This file produces additional analyses plots for sample ZF Stages, iteration of archR result.
params_df <- data.frame(param = names(params),
value = as.vector(unlist(params)))
knitr::kable(params_df)
| param | value |
|---|---|
| use_title | (Improved) archR result analysis report for CTSS in Zebrafish |
| sample_name | ZF Stages |
| iqw_order_by_median | TRUE |
| data_path_on_disk | |
| result_path_on_disk | |
| result_dir_name | |
| do_plot_seq_image | TRUE |
| do_plot_arch | TRUE |
| do_iqw_tpm_plots | TRUE |
| do_bedfile_write | TRUE |
sample_names <- c("64_cells", "dome_30perc_epiboly", "prim6")
archR_zf_best_run <- vector("list", length(sample_names))
names(archR_zf_best_run) <- sample_names
zf_2013_result <- vector("list", length(sample_names))
names(zf_2013_result) <- sample_names
zf_nepal2013_bed_info <- vector("list", length(sample_names))
names(zf_nepal2013_bed_info) <- sample_names
seqs_clusters_as_list <- vector("list", length(sample_names))
names(seqs_clusters_as_list) <- sample_names
seqs_clusters_as_list_ordered <- vector("list", length(sample_names))
names(seqs_clusters_as_list_ordered) <- sample_names
perSample_CAGEobj <- vector("list", length(sample_names))
names(perSample_CAGEobj) <- sample_names
perSample_peakAnno <- vector("list", length(sample_names))
names(perSample_peakAnno) <- sample_names
samarth_df <- vector("list", length(sample_names))
names(samarth_df) <- sample_names
##
phastCons_scores <- vector("list", length(sample_names))
names(phastCons_scores) <- sample_names
##
## lists storing plots
##
## store iqw+tpm plots
perSample_pl <- vector("list", length(sample_names))
names(perSample_pl) <- sample_names
## store architectures' plots
perSample_arch <- vector("list", length(sample_names))
names(perSample_arch) <- sample_names
## store architectures' plots combined
perSample_arch_combined <- vector("list", length(sample_names))
names(perSample_arch_combined) <- sample_names
perSample_arch_posStrand <- vector("list", length(sample_names))
names(perSample_arch_posStrand) <- sample_names
perSample_arch_negStrand <- vector("list", length(sample_names))
names(perSample_arch_negStrand) <- sample_names
## store result_directory path
result_dir_path <- vector("list", length(sample_names))
names(result_dir_path) <- sample_names
## store go plots
perSample_go <- vector("list", length(sample_names))
names(perSample_go) <- sample_names
## store clusters
perSample_archR_clusts <- vector("list", length(sample_names))
names(perSample_archR_clusts) <- sample_names
## store overlaps info
perCluster_overlaps_perSample <- vector("list", length(sample_names))
names(perCluster_overlaps_perSample) <- sample_names
################################################################################
## Per section some variables need to be set. These are done here
# txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
## CAGEr object processing
# read TagClusters information from corresponding RDS file for sample
# All samples in one RDS object
cager_obj <- file.path("/mnt/storage/cage_datasets/",
"danio_rerio/danRer10/cager_objects",
paste0("myCAGEset_Normalised_TagC_drerio_",
"danRer10_Nepal_processedMergedSamples.rds"))
ok_chr_names <- paste0("chr", 1:25)
for(sn in sample_names){
result_dir_path[[sn]] <- file.path(archR_org_results_path,
paste0(sn, "_results"))
}
for(sn in sample_names){
archR_zf_best_run[[sn]] <-
file.path(archR_org_results_path,
paste0("archR_result_zebrafish_nepal2012_", sn,
"_modSelType_stability_chunkSize_1000_",
"bound_1e-06_collate_FTFFF"))
if(file.exists(archR_zf_best_run[[sn]])){
zf_2013_result[[sn]] <- readRDS(file.path(archR_zf_best_run[[sn]],
"archRresult.rds"))
}else{
stop("Check if file exists. ", archR_zf_best_run[[sn]])
}
## Bed Info -- IQW and dominant TPM values
bed_fname <- file.path(archR_org_data_path, paste0("samarth_zf_TC_sample_Drerio_", sn, "_flank_500.bed"))
print(bed_fname)
zf_nepal2013_bed_info[[sn]] <- read.delim(file = bed_fname,
sep = "\t", header = FALSE,
col.names = c("chr", "start", "end", "IQW",
"domTPM", "strand"))
}
## [1] "data/zebrafish-nepal2013/new/samarth_zf_TC_sample_Drerio_64_cells_flank_500.bed"
## [1] "data/zebrafish-nepal2013/new/samarth_zf_TC_sample_Drerio_dome_30perc_epiboly_flank_500.bed"
## [1] "data/zebrafish-nepal2013/new/samarth_zf_TC_sample_Drerio_prim6_flank_500.bed"
## SAMPLE: 64_cells
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/64_cells_results
## ℹ Using saved GRanges obj from CAGEr object
## >> preparing features information... 2023-01-09 16:32:06
## >> identifying nearest features... 2023-01-09 16:32:07
## >> calculating distance from peak to TSS... 2023-01-09 16:32:07
## >> assigning genomic annotation... 2023-01-09 16:32:07
## >> adding gene annotation... 2023-01-09 16:32:18
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths 2023-01-09 16:32:18
## >> done... 2023-01-09 16:32:18
## SAMPLE: dome_30perc_epiboly
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/dome_30perc_epiboly_results
## ℹ Using saved GRanges obj from CAGEr object
## >> preparing features information... 2023-01-09 16:32:18
## >> identifying nearest features... 2023-01-09 16:32:18
## >> calculating distance from peak to TSS... 2023-01-09 16:32:19
## >> assigning genomic annotation... 2023-01-09 16:32:19
## >> adding gene annotation... 2023-01-09 16:32:20
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths 2023-01-09 16:32:20
## >> done... 2023-01-09 16:32:20
## SAMPLE: prim6
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/prim6_results
## ℹ Using saved GRanges obj from CAGEr object
## >> preparing features information... 2023-01-09 16:32:20
## >> identifying nearest features... 2023-01-09 16:32:20
## >> calculating distance from peak to TSS... 2023-01-09 16:32:20
## >> assigning genomic annotation... 2023-01-09 16:32:20
## >> adding gene annotation... 2023-01-09 16:32:21
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths 2023-01-09 16:32:22
## >> done... 2023-01-09 16:32:22
By default, the clusters are ordered by their median interquantile widths. One can choose to order by the mean interquantile widths by setting paramater iqw_order_by_median to FALSE.
## Sample 1, 64 cells
sn <- sample_names[1]
itr <- 5
use_aggl <- 'ward.D'
use_dist <- 'cor'
iter5_clusts_reord <- archR::collate_archR_result(
result = zf_2013_result[[sn]],
iter = itr, clust_method = 'hc', aggl_method = use_aggl,
dist_method = use_dist, regularize = FALSE, topn = 50,
flag = list(debugFlag = FALSE, verboseFlag = TRUE), collate = FALSE,
return_order = TRUE)
## Zebrafish result final clustering already uses cor + complete linkage
## So the result looks already good.
## these are in default archR ordering
clust_archR_ord_list <- archR::get_seqs_clust_list(
seqs_clust_lab = zf_2013_result[[ sn ]]$seqsClustLabels[[itr]])
## these are now ordered by the hc ordering
clust_hc_ord_list <- lapply(iter5_clusts_reord$order, function(x){
clust_archR_ord_list[[x]]
})
ordered_arch_pl <- archR::plot_arch_for_clusters(
seqs = zf_2013_result[[ sn ]]$rawSeqs,
clust_list = clust_hc_ord_list, pos_lab = -45:150,
xt_freq = 5, set_titles = FALSE, show = FALSE, bits_yax = "auto")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the ggseqlogo package.
## Please report the issue at <https://github.com/omarwagih/ggseqlogo/issues>.
ordered_arch_pl2 <- lapply(rev(ordered_arch_pl), function(pl){
pl <- pl +
ggplot2::theme(axis.text = ggplot2::element_text(size = 0),
axis.text.x = ggplot2::element_text(
angle = 0, vjust = 2, hjust = 0.5),
axis.text.y = ggplot2::element_text(vjust = 0.5),
axis.title.y = ggplot2::element_text(size = 0),
axis.ticks.length = ggplot2::unit(0.00, "cm"),
plot.margin = ggplot2::unit(c(-0.1,0,-0.4,-0.4), "cm"))
})
sam_foo <- cowplot::plot_grid(plotlist = ordered_arch_pl2, ncol = 1)
use_cutk <- 5
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/64_cells_results
fname <- file.path(result_dir_path[[sn]], paste0(sn, "_dend_arch_list_",
use_dist, "_", use_aggl, "_cut", use_cutk))
sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname, use_cutk = use_cutk,
clusts = iter5_clusts_reord, use_ht = 30, plot_png = FALSE,
lwd = 0.4, repel = TRUE, show_labels = TRUE, labels_track_height = 0.25,
rect = TRUE, rect_fill = TRUE,
color_labels_by_k = TRUE)
## Registered S3 method overwritten by 'dendextend':
## method from
## rev.hclust vegan
# sam_foo2
###
temp_clusts <- cutree(iter5_clusts_reord, k = use_cutk)
names(temp_clusts) <- NULL
## Make further few clusters
nCl <- length(unique(temp_clusts))
temp_clusts[c(14,28,30,31)] <- nCl + 1
existing_clust <- sort(unique(temp_clusts))
for(i in seq_along(existing_clust)){
idx <- which(temp_clusts == existing_clust[i])
temp_clusts[idx] <- i
}
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/64_cells_results
fname2 <- file.path(result_dir_path[[sn]], paste0(sn, "_dend_arch_list_",
use_dist, "_", use_aggl, "_",
use_cutk, "clusters_final"))
## Also need to show alongside, how the final clusters' seqlogos look
clust_list <- lapply(unique(temp_clusts), function(x){which(temp_clusts == x)})
seqs_clusters_as_list[[sn]] <- archR::collate_clusters(clust_list, archR::get_seqs_clust_list(zf_2013_result[[sn]]$seqsClustLabels[[itr]]))
##
use_color <- scales::hue_pal()(length(unique(temp_clusts)))
sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname2, use_ht = 60,
use_cutk = use_cutk,#length(unique(temp_clusts)),
clusts = iter5_clusts_reord, rect = TRUE, rect_fill = TRUE,
label_cols = use_color[temp_clusts[iter5_clusts_reord$order]],
k_colors = use_color,
clust_assignment = clust_list,
new_clusts = seqs_clusters_as_list[[sn]],
rawSeqs = zf_2013_result[[sn]]$rawSeqs,
palette = FALSE, plot_png = FALSE)
## Warning in get_col(col, k): Length of color vector was longer than the number of
## clusters - first k elements are used
# use_color <- scales::hue_pal()(length(unique(temp_clusts)))
# sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname2,
# use_cutk = use_cutk,
# #length(unique(temp_clusts)),
# clusts = iter5_clusts_reord, rect = TRUE, rect_fill = TRUE,
# label_cols = use_color[temp_clusts[iter5_clusts_reord$order]],
# k_colors = use_color, plot_png = FALSE,
# palette = FALSE)
##
# clust_list <- lapply(unique(temp_clusts), function(x){which(temp_clusts == x)})
# seqs_clusters_as_list[[sn]] <- archR::collate_clusters(clust_list, archR::get_seqs_clust_list(zf_2013_result[[sn]]$seqsClustLabels[[itr]]))
###
cluster_medians_IQW <- unlist(lapply(seqs_clusters_as_list[[sn]], function(x){
median(zf_nepal2013_bed_info[[sn]]$IQW[x])
}))
cluster_means_IQW <- unlist(lapply(seqs_clusters_as_list[[sn]], function(x){
mean(zf_nepal2013_bed_info[[sn]]$IQW[x])
}))
if(iqw_order_by_median){
ascending_order_IQW <- sort(cluster_medians_IQW, decreasing = FALSE,
index.return = TRUE)
}else{
ascending_order_IQW <- sort(cluster_means_IQW, decreasing = FALSE,
index.return = TRUE)
}
##
seqs_clusters_as_list_ordered[[sn]] <-
lapply(ascending_order_IQW$ix,
function(x){
seqs_clusters_as_list[[sn]][[x]]
})
perSample_archR_clusts[[sn]] <- seqs_clusters_as_list_ordered[[sn]]
## Sample 2, dome_30perc_epiboly needs final clustering
sn <- sample_names[2]
itr <- 5
use_aggl <- 'ward.D'
use_dist <- 'cor'
# iter5_clusts <- archR::collate_archR_result(
# result = zf_2013_result[[sample_names[1]]],
# iter = itr, clust_method = 'hc', aggl_method = use_aggl,
# dist_method = use_dist, regularize = TRUE, topn = 50,
# flag = list(debugFlag = TRUE, verboseFlag = TRUE), collate = TRUE,
# return_order = FALSE, minClusters = 4)
iter5_clusts_reord <- archR::collate_archR_result(
result = zf_2013_result[[sn]],
iter = itr, clust_method = 'hc', aggl_method = use_aggl,
dist_method = use_dist, regularize = TRUE, topn = 50,
flag = list(debugFlag = FALSE, verboseFlag = TRUE), collate = FALSE,
return_order = TRUE)
# iter5_clusts_reord1 <- archR::collate_archR_result(
# result = zf_2013_result[[sn]],
# iter = itr, clust_method = 'hc', aggl_method = "ward.D",
# dist_method = "euclid", regularize = TRUE, topn = 50,
# flag = list(debugFlag = TRUE, verboseFlag = TRUE), collate = FALSE,
# return_order = TRUE)
#
# iter5_clusts_reord2 <- archR::collate_archR_result(
# result = zf_2013_result[[sn]],
# iter = itr, clust_method = 'hc', aggl_method = "complete",
# dist_method = "cor", regularize = TRUE, topn = 50,
# flag = list(debugFlag = TRUE, verboseFlag = TRUE), collate = FALSE,
# return_order = TRUE)
## Zebrafish result final clustering already uses cor + complete linkage
## So the result looks already good.
## these are in default archR ordering
clust_archR_ord_list <- archR::get_seqs_clust_list(
seqs_clust_lab = zf_2013_result[[ sn ]]$seqsClustLabels[[itr]])
## these are now ordered by the hc ordering
clust_hc_ord_list <- lapply(iter5_clusts_reord$order, function(x){
clust_archR_ord_list[[x]]
})
ordered_arch_pl <- archR::plot_arch_for_clusters(
seqs = zf_2013_result[[ sn ]]$rawSeqs,
clust_list = clust_hc_ord_list, pos_lab = -45:150,
xt_freq = 5, set_titles = FALSE, show = FALSE, bits_yax = "auto")
ordered_arch_pl2 <- lapply(rev(ordered_arch_pl), function(pl){
pl <- pl +
ggplot2::theme(axis.text = ggplot2::element_text(size = 0),
axis.text.x = ggplot2::element_text(
angle = 0, vjust = 2, hjust = 0.5),
axis.text.y = ggplot2::element_text(vjust = 0.5),
axis.title.y = ggplot2::element_text(size = 0),
axis.ticks.length = ggplot2::unit(0.00, "cm"),
plot.margin = ggplot2::unit(c(-0.1,0,-0.4,-0.4), "cm"))
})
sam_foo <- cowplot::plot_grid(plotlist = ordered_arch_pl2, ncol = 1)
use_cutk <- 12
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/dome_30perc_epiboly_results
fname <- file.path(result_dir_path[[sn]], paste0(sn, "_dend_arch_list_",
use_dist, "_", use_aggl, "_cut", use_cutk))
sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname, use_cutk = use_cutk,
clusts = iter5_clusts_reord, use_ht = 30, plot_png = FALSE,
lwd = 0.4, repel = TRUE, show_labels = TRUE, labels_track_height = 0.25,
rect = TRUE, rect_fill = TRUE,
color_labels_by_k = TRUE)
# sam_foo2
###
temp_clusts <- cutree(iter5_clusts_reord, k = use_cutk)
names(temp_clusts) <- NULL
## Make further few clusters
nCl <- length(unique(temp_clusts))
temp_clusts[c(29)] <- temp_clusts[c(22)]
existing_clust <- sort(unique(temp_clusts))
for(i in seq_along(existing_clust)){
idx <- which(temp_clusts == existing_clust[i])
temp_clusts[idx] <- i
}
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/dome_30perc_epiboly_results
fname2 <- file.path(result_dir_path[[sn]], paste0(sn, "_dend_arch_list_",
use_dist, "_", use_aggl, "_",
use_cutk, "clusters_final"))
## Also need to show alongside, how the final clusters' seqlogos look
clust_list <- lapply(unique(temp_clusts), function(x){which(temp_clusts == x)})
seqs_clusters_as_list[[sn]] <- archR::collate_clusters(clust_list, archR::get_seqs_clust_list(zf_2013_result[[sn]]$seqsClustLabels[[itr]]))
##
use_color <- scales::hue_pal()(length(unique(temp_clusts)))
sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname2, use_ht = 60,
use_cutk = use_cutk,#length(unique(temp_clusts)),
clusts = iter5_clusts_reord, rect = TRUE, rect_fill = TRUE,
label_cols = use_color[temp_clusts[iter5_clusts_reord$order]],
k_colors = use_color,
clust_assignment = clust_list,
new_clusts = seqs_clusters_as_list[[sn]],
rawSeqs = zf_2013_result[[sn]]$rawSeqs,
palette = FALSE, plot_png = FALSE)
## Warning in get_col(col, k): Length of color vector was shorter than the number
## of clusters - color vector was recycled
# use_color <- scales::hue_pal()(length(unique(temp_clusts)))
# sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname2,
# use_cutk = use_cutk,
# #length(unique(temp_clusts)),
# clusts = iter5_clusts_reord, rect = TRUE, rect_fill = TRUE,
# label_cols = use_color[temp_clusts[iter5_clusts_reord$order]],
# k_colors = use_color, plot_png = FALSE,
# palette = FALSE)
#
#
# clust_list <- lapply(unique(temp_clusts), function(x){which(temp_clusts == x)})
# seqs_clusters_as_list[[sn]] <- archR::collate_clusters(clust_list, archR::get_seqs_clust_list(zf_2013_result[[ sn ]]$seqsClustLabels[[itr]]))
###
cluster_medians_IQW <- unlist(lapply(seqs_clusters_as_list[[sn]], function(x){
median(zf_nepal2013_bed_info[[sn]]$IQW[x])
}))
cluster_means_IQW <- unlist(lapply(seqs_clusters_as_list[[sn]], function(x){
mean(zf_nepal2013_bed_info[[sn]]$IQW[x])
}))
if(iqw_order_by_median){
ascending_order_IQW <- sort(cluster_medians_IQW, decreasing = FALSE,
index.return = TRUE)
}else{
ascending_order_IQW <- sort(cluster_means_IQW, decreasing = FALSE,
index.return = TRUE)
}
##
seqs_clusters_as_list_ordered[[sn]] <-
lapply(ascending_order_IQW$ix,
function(x){
seqs_clusters_as_list[[sn]][[x]]
})
perSample_archR_clusts[[sn]] <- seqs_clusters_as_list_ordered[[sn]]
sn <- sample_names[3]
itr <- 5
use_aggl <- 'ward.D'
use_dist <- 'cor'
iter5_clusts_reord <- archR::collate_archR_result(
result = zf_2013_result[[sn]],
iter = itr, clust_method = 'hc', aggl_method = use_aggl,
dist_method = use_dist, regularize = FALSE, topn = 50,
flag = list(debugFlag = FALSE, verboseFlag = TRUE), collate = FALSE,
return_order = TRUE)
clust_archR_ord_list <- archR::get_seqs_clust_list(
seqs_clust_lab = zf_2013_result[[ sn ]]$seqsClustLabels[[itr]])
## these are now ordered by the hc ordering
clust_hc_ord_list <- lapply(iter5_clusts_reord$order, function(x){
clust_archR_ord_list[[x]]
})
ordered_arch_pl <- archR::plot_arch_for_clusters(
seqs = zf_2013_result[[ sn ]]$rawSeqs,
clust_list = clust_hc_ord_list, pos_lab = -45:150,
xt_freq = 5, set_titles = FALSE, show = FALSE, bits_yax = "auto")
ordered_arch_pl2 <- lapply(rev(ordered_arch_pl), function(pl){
pl <- pl +
ggplot2::theme(axis.text = ggplot2::element_text(size = 0),
axis.text.x = ggplot2::element_text(
angle = 0, vjust = 2, hjust = 0.5),
axis.text.y = ggplot2::element_text(vjust = 0.5),
axis.title.y = ggplot2::element_text(size = 0),
axis.ticks.length = ggplot2::unit(0.00, "cm"),
plot.margin = ggplot2::unit(c(-0.1,0,-0.4,-0.4), "cm"))
})
sam_foo <- cowplot::plot_grid(plotlist = ordered_arch_pl2, ncol = 1)
use_cutk <- 12
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/prim6_results
fname <- file.path(result_dir_path[[sn]], paste0(sn, "_dend_arch_list_",
use_dist, "_", use_aggl, "_cut", use_cutk))
sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname, use_cutk = use_cutk,
clusts = iter5_clusts_reord, use_ht = 30, plot_png = FALSE,
lwd = 0.4, repel = TRUE, show_labels = TRUE, labels_track_height = 0.25,
rect = TRUE, rect_fill = TRUE,
color_labels_by_k = TRUE)
# sam_foo2
###
temp_clusts <- cutree(iter5_clusts_reord, k = use_cutk)
names(temp_clusts) <- NULL
## Make further few clusters
nCl <- length(unique(temp_clusts))
temp_clusts[c(21,2,29)] <- temp_clusts[c(19)]
temp_clusts[c(8)] <- temp_clusts[c(11)]
existing_clust <- sort(unique(temp_clusts))
for(i in seq_along(existing_clust)){
idx <- which(temp_clusts == existing_clust[i])
temp_clusts[idx] <- i
}
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/prim6_results
fname2 <- file.path(result_dir_path[[sn]], paste0(sn, "_dend_arch_list_",
use_dist, "_", use_aggl, "_",
use_cutk, "clusters_final"))
## Also need to show alongside, how the final clusters' seqlogos look
clust_list <- lapply(unique(temp_clusts), function(x){which(temp_clusts == x)})
seqs_clusters_as_list[[sn]] <- archR::collate_clusters(clust_list, archR::get_seqs_clust_list(zf_2013_result[[sn]]$seqsClustLabels[[itr]]))
##
use_color <- scales::hue_pal()(length(unique(temp_clusts)))
sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname2, use_ht = 60,
use_cutk = use_cutk,#length(unique(temp_clusts)),
clusts = iter5_clusts_reord, rect = TRUE, rect_fill = TRUE,
label_cols = use_color[temp_clusts[iter5_clusts_reord$order]],
k_colors = use_color,
clust_assignment = clust_list,
new_clusts = seqs_clusters_as_list[[sn]],
rawSeqs = zf_2013_result[[sn]]$rawSeqs,
palette = FALSE, plot_png = FALSE)
## Warning in get_col(col, k): Length of color vector was shorter than the number
## of clusters - color vector was recycled
# use_color <- scales::hue_pal()(length(unique(temp_clusts)))
# sam_foo2 <- plot_dend_arch(arch_plot = sam_foo, fname = fname2,
# use_cutk = use_cutk,
# #length(unique(temp_clusts)),
# clusts = iter5_clusts_reord, rect = TRUE, rect_fill = TRUE,
# label_cols = use_color[temp_clusts[iter5_clusts_reord$order]],
# k_colors = use_color, plot_png = FALSE,
# palette = FALSE)
#
#
# clust_list <- lapply(unique(temp_clusts), function(x){which(temp_clusts == x)})
# seqs_clusters_as_list[[sn]] <- archR::collate_clusters(clust_list, archR::get_seqs_clust_list(zf_2013_result[[ sn ]]$seqsClustLabels[[itr]]))
###
###
cluster_medians_IQW <- unlist(lapply(seqs_clusters_as_list[[sn]], function(x){
median(zf_nepal2013_bed_info[[sn]]$IQW[x])
}))
cluster_means_IQW <- unlist(lapply(seqs_clusters_as_list[[sn]], function(x){
mean(zf_nepal2013_bed_info[[sn]]$IQW[x])
}))
if(iqw_order_by_median){
ascending_order_IQW <- sort(cluster_medians_IQW, decreasing = FALSE,
index.return = TRUE)
}else{
ascending_order_IQW <- sort(cluster_means_IQW, decreasing = FALSE,
index.return = TRUE)
}
##
seqs_clusters_as_list_ordered[[sn]] <-
lapply(ascending_order_IQW$ix,
function(x){
seqs_clusters_as_list[[sn]][[x]]
})
perSample_archR_clusts[[sn]] <- seqs_clusters_as_list_ordered[[sn]]
## Display as carousel
fnames <- unlist(lapply(sample_names, function(x){
respath <- file.path(archR_org_results_path, paste0(x, "_results"))
flist <- list.files(respath, pattern = paste0(x, "_dend_arch_list*"))
last3 <- unlist(lapply(flist, function(y){
substr(y, nchar(y)-2, nchar(y))
}))
idx <- which(last3 == "png")
# stopifnot(length(flist[idx]) == 1)
file.path(respath, flist[idx])
}))
cP1 <- htmlwidgets::JS("function(slick,index) {
return '<a>'+(index+1)+'</a>';
}")
slick_clustering_imgs <- slickR::slickR(obj = fnames, slideId = "clusterings",
slideType = "img", width="100%", height = "800px") +
slickR::settings(initialSlide = 0,
infinite = FALSE,
focusOnSelect = TRUE,
dots = TRUE,
customPaging = cP1)
slick_clustering_imgs
for(sn in sample_names) {
clust_lens <- unlist(lapply(seqs_clusters_as_list_ordered[[sn]], length))
message("Ordered list lengths: ", paste(clust_lens, collapse = " "))
message("Original list lengths:",
paste(unlist(lapply(seqs_clusters_as_list[[sn]], length)), collapse = " "))
##
clust_lab <- rep("0", length(zf_2013_result[[sn]]$rawSeqs))
clust_names <- sort(as.character(1:length(seqs_clusters_as_list_ordered[[sn]])))
for(i in seq_along(seqs_clusters_as_list_ordered[[sn]])){
clust_lab[seqs_clusters_as_list_ordered[[sn]][[i]] ] <- clust_names[i]
}
### Get DF ob BED information
samarth_df[[sn]] <- data.frame(chr = zf_nepal2013_bed_info[[sn]]$chr,
start = zf_nepal2013_bed_info[[sn]]$start,
end = zf_nepal2013_bed_info[[sn]]$end,
strand = zf_nepal2013_bed_info[[sn]]$strand,
IQW = zf_nepal2013_bed_info[[sn]]$IQW,
domTPM = zf_nepal2013_bed_info[[sn]]$domTPM,
# phast = phastCons_scores[[sn]],
clust_ID = clust_lab)
}
## Ordered list lengths: 1785 2356 1039 1002 6473 5613
## Original list lengths:5613 1785 6473 2356 1002 1039
## Ordered list lengths: 66 34 315 5886 390 7155 3621 2585 53 116 571
## Original list lengths:7155 3621 390 5886 571 66 34 2585 53 315 116
## Ordered list lengths: 51 70 104 32 181 2503 2000 4383 1621 9021
## Original list lengths:4383 9021 2503 1621 2000 32 51 70 181 104
if(do_bedfile_write){
message("Task: Write cluster-wise BED files to disk")
for(sn in sample_names){
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
##
bedFilesPath <- file.path(result_dir_path[[sn]], "Cluster_BED_tracks")
stopifnot(check_and_create_dir(bedFilesPath))
message("Writing cluster BED track files at:", bedFilesPath)
##
#### Write bed to disk -- each cluster in a separate bed file
cat(paste0("\n\n### Individual cluster track BED files [", sn, "]\n\n"))
for(lo in seq_along(seqs_clusters_as_list_ordered[[sn]])){
##
bedFilename <- file.path(bedFilesPath,
paste0("zf_nepal2013_TC_sample_", sn,
"_cluster", lo, ".bed"))
# print(bedFilename)
seq_ids <- seqs_clusters_as_list_ordered[[sn]][[lo]]
limit_df <- samarth_df[[sn]][seq_ids,]
track.name <- paste0(sn, "_danRer10_", "clust", lo)
write_as_track_bed(limit_df, seq_ids, track_name = track.name,
bedFilename = bedFilename)
dload_text <- xfun::embed_file(path = bedFilename,
text = paste("Download coordinates for cluster", lo, " as browser track"))
cat(paste0("\n<", dload_text$name," href=\"", dload_text$attribs$href, "\" download=\"", dload_text$attribs$download, "\">", dload_text$children[[1]][1],
"</a>\n"))
}
#####
message("Preparing strand-separated files")
## Strand-wise separate files per cluster
cat(paste0("\n\n### Strand-separated individual cluster track BED files[", sn, "]\n\n"))
for(lo in seq_along(seqs_clusters_as_list_ordered[[sn]])){
## plus strand
bedFilename_plus <- file.path(bedFilesPath,
paste0("zf_nepal2013_TC_sample_", sn,
"_cluster", lo, "_plus_strand.bed"))
##
chosen_idx_plus_strand <- get_strand_specific_indices(df = samarth_df[[sn]],
seq_ids_in_clust = seqs_clusters_as_list_ordered[[sn]][[lo]],
strand_val = "+")
##
if(length(chosen_idx_plus_strand) < 1){
cat(paste0("<a href= >Empty", "</a>"))
}else{
##
limit_df <- samarth_df[[sn]][chosen_idx_plus_strand,]
track.name <- paste0(sn, "_danRer10_", "clust", lo, "_plus_strand")
write_as_track_bed(limit_df, chosen_idx_plus_strand, track_name = track.name,
bedFilename = bedFilename_plus)
dload_text <- xfun::embed_file(bedFilename_plus,
text = paste("Download coordinates for cluster",
lo, "(+ strand) as browser track"))
cat(paste0("\n<", dload_text$name," href=\"", dload_text$attribs$href, "\" download=\"", dload_text$attribs$download, "\">", dload_text$children[[1]][1],
"</a>, "))
}
## minus strand
bedFilename_minus <- file.path(bedFilesPath,
paste0("zf_nepal2013_TC_sample_",
sn, "_cluster",
lo, "_minus_strand.bed"))
##
##
chosen_idx_minus_strand <- get_strand_specific_indices(df = samarth_df[[sn]],
seq_ids_in_clust = seqs_clusters_as_list_ordered[[sn]][[lo]],
strand_val = "-")
##
if(length(chosen_idx_minus_strand) < 1){
cat(paste0("<a href= >Empty", "</a>\n"))
}else{
limit_df <- samarth_df[[sn]][chosen_idx_minus_strand,]
track.name <- paste0(sn, "_danRer10_", "clust", lo, "_plus_strand")
write_as_track_bed(limit_df, chosen_idx_minus_strand, track_name = track.name,
bedFilename = bedFilename_minus)
dload_text <- xfun::embed_file(bedFilename_minus,
text = paste("Download coordinates for cluster",
lo, "(- strand) as browser track"))
cat(paste0("<", dload_text$name," href=\"", dload_text$attribs$href, "\" download=\"", dload_text$attribs$download, "\">", dload_text$children[[1]][1], "</a>\n"))
}
##
}
}
##
dload_all <- xfun::embed_dir(bedFilesPath,
text = paste("Download all clusters as separate browser track files (zip)"))
cat(paste0("### All cluster track files (one zipped folder of all BED files)\n\n"))
cat(paste0("\n<", dload_all$name," href=\"", dload_all$attribs$href, "\" download=\"", dload_all$attribs$download, "\">", dload_all$children[[1]][1],
"</a>\n"))
##
}else{
message("do_bedfile_write is FALSE")
}
Download coordinates for cluster 1 as browser track
Download coordinates for cluster 2 as browser track
Download coordinates for cluster 3 as browser track
Download coordinates for cluster 4 as browser track
Download coordinates for cluster 1 (+ strand) as browser track, Download coordinates for cluster 1 (- strand) as browser track
Download coordinates for cluster 2 (+ strand) as browser track, Download coordinates for cluster 2 (- strand) as browser track
Download coordinates for cluster 3 (+ strand) as browser track, Download coordinates for cluster 3 (- strand) as browser track
Download coordinates for cluster 4 (+ strand) as browser track, Download coordinates for cluster 4 (- strand) as browser track
Download coordinates for cluster 5 (+ strand) as browser track, Download coordinates for cluster 5 (- strand) as browser track
Download coordinates for cluster 6 (+ strand) as browser track, Download coordinates for cluster 6 (- strand) as browser track
Download coordinates for cluster 1 as browser track
Download coordinates for cluster 2 as browser track
Download coordinates for cluster 3 as browser track
Download coordinates for cluster 4 as browser track
Download coordinates for cluster 5 as browser track
Download coordinates for cluster 6 as browser track
Download coordinates for cluster 7 as browser track
Download coordinates for cluster 8 as browser track
Download coordinates for cluster 9 as browser track
Download coordinates for cluster 1 (+ strand) as browser track, Download coordinates for cluster 1 (- strand) as browser track
Download coordinates for cluster 2 (+ strand) as browser track, Download coordinates for cluster 2 (- strand) as browser track
Download coordinates for cluster 3 (+ strand) as browser track, Download coordinates for cluster 3 (- strand) as browser track
Download coordinates for cluster 4 (+ strand) as browser track, Download coordinates for cluster 4 (- strand) as browser track
Download coordinates for cluster 5 (+ strand) as browser track, Download coordinates for cluster 5 (- strand) as browser track
Download coordinates for cluster 6 (+ strand) as browser track, Download coordinates for cluster 6 (- strand) as browser track
Download coordinates for cluster 7 (+ strand) as browser track, Download coordinates for cluster 7 (- strand) as browser track
Download coordinates for cluster 8 (+ strand) as browser track, Download coordinates for cluster 8 (- strand) as browser track
Download coordinates for cluster 9 (+ strand) as browser track, Download coordinates for cluster 9 (- strand) as browser track
Download coordinates for cluster 10 (+ strand) as browser track, Download coordinates for cluster 10 (- strand) as browser track
Download coordinates for cluster 11 (+ strand) as browser track, Download coordinates for cluster 11 (- strand) as browser track
Download coordinates for cluster 1 as browser track
Download coordinates for cluster 2 as browser track
Download coordinates for cluster 3 as browser track
Download coordinates for cluster 4 as browser track
Download coordinates for cluster 5 as browser track
Download coordinates for cluster 6 as browser track
Download coordinates for cluster 7 as browser track
Download coordinates for cluster 8 as browser track
Download coordinates for cluster 1 (+ strand) as browser track, Download coordinates for cluster 1 (- strand) as browser track
Download coordinates for cluster 2 (+ strand) as browser track, Download coordinates for cluster 2 (- strand) as browser track
Download coordinates for cluster 3 (+ strand) as browser track, Download coordinates for cluster 3 (- strand) as browser track
Download coordinates for cluster 4 (+ strand) as browser track, Download coordinates for cluster 4 (- strand) as browser track
Download coordinates for cluster 5 (+ strand) as browser track, Download coordinates for cluster 5 (- strand) as browser track
Download coordinates for cluster 6 (+ strand) as browser track, Download coordinates for cluster 6 (- strand) as browser track
Download coordinates for cluster 7 (+ strand) as browser track, Download coordinates for cluster 7 (- strand) as browser track
Download coordinates for cluster 8 (+ strand) as browser track, Download coordinates for cluster 8 (- strand) as browser track
Download coordinates for cluster 9 (+ strand) as browser track, Download coordinates for cluster 9 (- strand) as browser track
Download coordinates for cluster 10 (+ strand) as browser track, Download coordinates for cluster 10 (- strand) as browser track ### All cluster track files (one zipped folder of all BED files)
####
if(do_IQW_TPM_plots){
message("Task: Producing IQW_TPM_PhastCons scores combined plot")
for(sn in sample_names){
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
pl_iqw <- get_iqw_ord_plot(iqw = TRUE, y_axis_text = TRUE,
samarth_df = samarth_df[[sn]],
seqs_clust = seqs_clusters_as_list_ordered[[sn]])
pl_iqw <- pl_iqw + ggeasy::easy_all_text_size(size=18) +
NULL +
ggeasy::easy_y_axis_labels_size(size=18) +
ggeasy::easy_x_axis_labels_size(size=18) +
NULL
##
pl_tpm <- get_iqw_ord_plot(tpm = TRUE, y_axis_text = FALSE,
samarth_df = samarth_df[[sn]],
seqs_clust = seqs_clusters_as_list_ordered[[sn]])
pl_tpm <- pl_tpm + ggeasy::easy_all_text_size(size=18) +
NULL +
# ggeasy::easy_y_axis_labels_size(size=18) +
ggeasy::easy_x_axis_labels_size(size=18) +
NULL
##
# pl_phast <- get_iqw_ord_plot(phast = TRUE, y_axis_text = FALSE,
# samarth_df = samarth_df[[sn]],
# seqs_clust = seqs_clusters_as_list_ordered[[sn]])
# pl_phast <- pl_phast + ggeasy::easy_all_text_size(size=18) +
# NULL +
# # ggeasy::easy_y_axis_labels_size(size=18) +
# ggeasy::easy_x_axis_labels_size(size=18) +
# NULL
## IQW ordered plots
iqw_tpm_ord_pls <- pl_iqw | pl_tpm
# iqw_tpm_phast_ord_pls <- pl_iqw | pl_tpm | pl_phast
perSample_pl[[sn]] <- iqw_tpm_ord_pls #iqw_tpm_phast_ord_pls
# title_str <- paste0("Sample_", sn, "_IQW_TPM_phast_plot")
title_str <- paste0("Sample_", sn, "_IQW_TPM_plot")
pl_w_title <- perSample_pl[[sn]] + plot_annotation(title = title_str)
cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
paste0(title_str, ".pdf")),
plot = pl_w_title,
base_height = 12, base_width = 6
)
# cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
# paste0(title_str, ".png")),
# plot = pl_w_title,
# base_height = 17, base_width = 8
# )
# ggsave(filename = file.path(result_dir_path[[sn]],
# paste0(title_str, ".pdf")),
# plot = pl_w_title,
# width = 10, height = 7)
# ggsave(filename = file.path(result_dir_path[[sn]],
# paste0(title_str, ".png")),
# plot = pl_w_title, device = "png", dpi = 300,
# width = 10, height = 7)
# print(pl_w_title)
}
}else{
message("do_iqw_tpm_plot is FALSE")
}
Sequence logos of clusters ordered by their median interquantile width (ascending order). These are included as part of the combined panels. See 64 cells stage, dome 30% epiboly stage and prim6 stage.
#fig.width=11, fig.height=10, out.width="1200px", out.height="800px",
##
if(do_plot_arch){
use_ht <- list(15, 30, 20)
names(use_ht) <- sample_names
for(sn in sample_names){
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
##
message("Generating architectures for clusters of sequences...")
fname <- file.path(result_dir_path[[sn]],
paste0("Architectures_0-max.pdf"))
iter5_zf_arch_list <- lapply(seq_along(perSample_archR_clusts[[sn]]),
function(y){
x <- perSample_archR_clusts[[sn]][[y]]
pl <- archR::plot_ggseqlogo_of_seqs(as.character(zf_2013_result[[sn]]$rawSeqs[x]),
pos_lab = -45:150, bits_yax = "auto", title = NULL)
pl + theme(axis.text = element_text(size=10),
# axis.text.x = element_text(angle=0, vjust = 2, hjust = 0.5),
# axis.text.x = element_text(angle=90, vjust = 0, hjust = 0),
axis.text.y = element_text(vjust = 0.5),
axis.title.y = element_text(size=10),
axis.ticks.length = unit(0.04, "cm"),
plot.margin = unit(c(0,0.1,-0.4,0.1), "cm")) +
ggplot2::scale_y_continuous(sec.axis = dup_axis(name = paste0("C",y), labels = NULL))
# pl + theme(plot.margin = unit(c(0,0.2,-0.5,0.1), "cm"))
# pl + theme(plot.margin = unit(c(0,0,-0.3,0), "cm"))
})
# print(length(iter5_dm_arch_list))
r1c1 <- cowplot::plot_grid(plotlist = iter5_zf_arch_list, ncol = 1)
# cowplot::save_plot(fname, plot = r1c1, limitsize = FALSE, base_width = 30,
# nrow = length(seqs_clusters_as_list_ordered[[sn]]))
cowplot::ggsave2(fname, plot= r1c1, width=25, height=use_ht[[sn]], units="cm",
dpi = 600, limitsize = FALSE)
## save PNGs
# for(p in 1:length(iter5_zf_arch_list)){
# this_result_dir_path <- file.path(result_dir_path[[sn]], "arch_png")
# stopifnot(check_and_create_dir(this_result_dir_path))
# fname <- file.path(this_result_dir_path,
# paste0("Architecture_clust", p, "_0-max.png"))
# cowplot::ggsave2(fname, plot= iter5_zf_arch_list[[p]],
# width=25, height=3, units="cm", dpi = 300)
# }
# r1c1
perSample_arch_combined[[sn]] <- r1c1
perSample_arch[[sn]] <- iter5_zf_arch_list
if(file.exists(fname)){
knitr::include_graphics(fname)
}
}
}else{
message("do_plot_arch is FALSE")
}
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/64_cells_results
## Generating architectures for clusters of sequences...
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/dome_30perc_epiboly_results
##
## Generating architectures for clusters of sequences...
##
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/prim6_results
##
## Generating architectures for clusters of sequences...
##
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
##
perSample_entrezList <- lapply(seq_along(perSample_archR_clusts), function(x){
as_df_entrez_id <- as.data.frame(perSample_peakAnno[[x]])$ENTREZID
sam <- lapply(perSample_archR_clusts[[x]], function(y){
temp <- as_df_entrez_id[y]
temp[which(!is.na(temp))]
})
names(sam) <- paste0("C", seq_along(sam))
sam
})
names(perSample_entrezList) <- names(perSample_archR_clusts)
sn <- sample_names[1]
peakAnno_df <- as.data.frame(perSample_peakAnno[[sn]])
## peakAnno_df here doesn't have columns tpm.dominant_ctss
peakAnno_df$tpm.dominant_ctss <- zf_nepal2013_bed_info[[sn]]$domTPM
unique_genes <- unique(peakAnno_df$SYMBOL)
sumTPM_across_TC_per_gene <- lapply(unique_genes, function(x){
sum(peakAnno_df$tpm.dominant_ctss[which(peakAnno_df$SYMBOL == x)])
})
names(sumTPM_across_TC_per_gene) <- unique_genes
genes1 <- lapply(1:length(perSample_archR_clusts[[sn]]), function(x){
##
all_genenames <-
peakAnno_df[unlist(perSample_archR_clusts[[sn]][x]), c("GENENAME", "ENTREZID", "geneId", "SYMBOL", "annotation", "width", "tpm.dominant_ctss")]
## To sort by motif scores, get motif scores first
motif_scores <- suppressMessages(get_motif_scores(zf_2013_result, perSample_archR_clusts, sn, clust_id = x))
##
##
anno_terms1 <- all_genenames$annotation
anno_terms_splits <- strsplit(anno_terms1, " ")
anno_terms <- unlist(lapply(anno_terms_splits, function(x){
if(is.na(x[1])){ "NA";
}else if(grepl("Intron", x[1]) || grepl("Exon", x[1])){
paste0(x[1], " (", paste(x[3:6], collapse = " "))
}else if(grepl("Promoter", x[1])){
x[1]
}else{ paste(x[1], x[2]) }
}))
##
sorted <- sort(motif_scores, index.return=TRUE,
decreasing = TRUE)
##
##
## To sort alphabetically by Gene Symbol
naStr <- unlist(lapply(all_genenames$SYMBOL, function(x){
if(is.na(x)){
"NA"
}else{
x
}
}))
all_genenames$SYMBOL <- naStr
sorted <- sort(all_genenames$SYMBOL, index.return=TRUE)
sam2 <- unlist(lapply(all_genenames$SYMBOL, function(x) {
if(x != "NA"){
sumTPM_across_TC_per_gene[[x]]
}else{
NA
}
}))
##
# Displays as datatable which is sortable, searchable
print_df <- data.frame("Symbol" = all_genenames$SYMBOL[sorted$ix],
"Weblink" =
paste0('<a href="https://www.ensembl.org/Danio_rerio/Gene/Summary?db=core;g=',
all_genenames$geneId[sorted$ix], '" target="_blank">',
all_genenames$GENENAME[sorted$ix],
'</a> '),
"Annot" = anno_terms[sorted$ix],
"domTPM" = format(round(all_genenames$tpm.dominant_ctss[sorted$ix], 3), nsmall=3),
"%%.domTPM" = format(round(100*(all_genenames$tpm.dominant_ctss[sorted$ix]/sam2[sorted$ix]), 2), nsmall=2),
"Match.score"= form_str(motif_scores[sorted$ix])
)
colnames(print_df)[5] <- "%domTPM"
return(print_df)
# Displays as general html text
paste0('--<span style="color:red">', all_genenames$SYMBOL[sorted$ix], '</span> ',
'<a href="https://www.ensembl.org/Danio_rerio/Gene/Summary?db=core;g=',
all_genenames$geneId[sorted$ix], '" target="_blank">',
all_genenames$GENENAME[sorted$ix],
'</a> ',
'<span style="color:green">', form_str(sorted$x), '</span> ',
collapse=" <br/> ")
})
genes <- lapply(genes1, function(x){
DT::datatable(x, escape = c(1), rownames = FALSE, width = 600, height = 1200,
options = list(paging= FALSE),
caption = paste0("TSS info [", nrow(x), " entries] sorted by the gene symbol"))
})
# MatchScores <- unlist(lapply(1:length(perSample_archR_clusts[[sn]]), function(x){
# ## To sort by motif scores, get motif scores first
# motif_scores <- suppressMessages(get_motif_scores(zf_2013_result, perSample_archR_clusts, sn, clust_id = x))
# ##
# sorted <- sort(motif_scores, index.return=TRUE,
# decreasing = TRUE)
# ##
# paste0('<a href="">', form_str(sorted$x),
# '</a>', collapse=", <br/> ")
# }))
#
# message(length(genes), " & ", length(MatchScores))
sn <- sample_names[2]
peakAnno_df <- as.data.frame(perSample_peakAnno[[sn]])
## peakAnno_df here doesn't have columns tpm.dominant_ctss
peakAnno_df$tpm.dominant_ctss <- zf_nepal2013_bed_info[[sn]]$domTPM
unique_genes <- unique(peakAnno_df$SYMBOL)
sumTPM_across_TC_per_gene <- lapply(unique_genes, function(x){
sum(peakAnno_df$tpm.dominant_ctss[which(peakAnno_df$SYMBOL == x)])
})
names(sumTPM_across_TC_per_gene) <- unique_genes
genes1 <- lapply(seq_along(perSample_archR_clusts[[sn]]), function(x){
##
all_genenames <-
peakAnno_df[unlist(perSample_archR_clusts[[sn]][x]), c("GENENAME", "ENTREZID", "geneId", "SYMBOL", "annotation", "width", "tpm.dominant_ctss")]
## To sort by motif scores, get motif scores first
motif_scores <- suppressMessages(get_motif_scores(zf_2013_result, perSample_archR_clusts, sn, clust_id = x))
##
##
anno_terms1 <- all_genenames$annotation
anno_terms_splits <- strsplit(anno_terms1, " ")
anno_terms <- unlist(lapply(anno_terms_splits, function(x){
if(is.na(x[1])){ "NA";
}else if(grepl("Intron", x[1]) || grepl("Exon", x[1])){
paste0(x[1], " (", paste(x[3:6], collapse = " "))
}else if(grepl("Promoter", x[1])){
x[1]
}else{ paste(x[1], x[2]) }
}))
##
sorted <- sort(motif_scores, index.return=TRUE,
decreasing = TRUE)
##
## To sort alphabetically by Gene Symbol
naStr <- unlist(lapply(all_genenames$SYMBOL, function(x){
if(is.na(x)){
"NA"
}else{
x
}
}))
all_genenames$SYMBOL <- naStr
sorted <- sort(all_genenames$SYMBOL, index.return=TRUE)
sam2 <- unlist(lapply(all_genenames$SYMBOL, function(x) {
if(x != "NA"){
sumTPM_across_TC_per_gene[[x]]
}else{
NA
}
}))
##
# Displays as datatable which is sortable, searchable
print_df <- data.frame("Symbol" = all_genenames$SYMBOL[sorted$ix],
"Weblink" =
paste0('<a href="https://www.ensembl.org/Danio_rerio/Gene/Summary?db=core;g=',
all_genenames$geneId[sorted$ix], '" target="_blank">',
all_genenames$GENENAME[sorted$ix],
'</a> '),
"Annot" = anno_terms[sorted$ix],
"domTPM" = format(round(all_genenames$tpm.dominant_ctss[sorted$ix], 3), nsmall=3),
"%%.domTPM" = format(round(100*(all_genenames$tpm.dominant_ctss[sorted$ix]/sam2[sorted$ix]), 2), nsmall=2),
"Match.score"= form_str(motif_scores[sorted$ix])
)
colnames(print_df)[5] <- "%domTPM"
return(print_df)
# Displays as general html text
paste0('--<span style="color:red">', all_genenames$SYMBOL[sorted$ix], '</span> ',
'<a href="https://www.ensembl.org/Danio_rerio/Gene/Summary?db=core;g=',
all_genenames$geneId[sorted$ix], '" target="_blank">',
all_genenames$GENENAME[sorted$ix],
'</a> ',
'<span style="color:green">', form_str(sorted$x), '</span> ',
collapse=" <br/> ")
})
genes <- lapply(genes1, function(x){
DT::datatable(x, escape = c(1), rownames = FALSE, width = 600, height = 1200,
options = list(paging= FALSE),
caption = paste0("TSS info [", nrow(x), " entries] sorted by the gene symbol"))
})
# MatchScores <- unlist(lapply(1:length(perSample_archR_clusts[[sn]]), function(x){
# ## To sort by motif scores, get motif scores first
# motif_scores <- suppressMessages(get_motif_scores(zf_2013_result, perSample_archR_clusts, sn, clust_id = x))
# ##
# sorted <- sort(motif_scores, index.return=TRUE,
# decreasing = TRUE)
# ##
# paste0('<a href="">', form_str(sorted$x),
# '</a>', collapse=", <br/> ")
# }))
#
# # message(length(genes), " & ", length(MatchScores))
sn <- sample_names[3]
peakAnno_df <- as.data.frame(perSample_peakAnno[[sn]])
## peakAnno_df here doesn't have columns tpm.dominant_ctss
peakAnno_df$tpm.dominant_ctss <- zf_nepal2013_bed_info[[sn]]$domTPM
unique_genes <- unique(peakAnno_df$SYMBOL)
sumTPM_across_TC_per_gene <- lapply(unique_genes, function(x){
sum(peakAnno_df$tpm.dominant_ctss[which(peakAnno_df$SYMBOL == x)])
})
names(sumTPM_across_TC_per_gene) <- unique_genes
genes1 <- lapply(1:length(perSample_archR_clusts[[sn]]), function(x){
##
all_genenames <-
peakAnno_df[unlist(perSample_archR_clusts[[sn]][x]), c("GENENAME", "ENTREZID", "geneId", "SYMBOL", "annotation", "width", "tpm.dominant_ctss")]
## To sort by motif scores, get motif scores first
motif_scores <- suppressMessages(get_motif_scores(zf_2013_result, perSample_archR_clusts, sn, clust_id = x))
##
##
anno_terms1 <- all_genenames$annotation
anno_terms_splits <- strsplit(anno_terms1, " ")
anno_terms <- unlist(lapply(anno_terms_splits, function(x){
if(is.na(x[1])){ "NA";
}else if(grepl("Intron", x[1]) || grepl("Exon", x[1])){
paste0(x[1], " (", paste(x[3:6], collapse = " "))
}else if(grepl("Promoter", x[1])){
x[1]
}else{ paste(x[1], x[2]) }
}))
##
sorted <- sort(motif_scores, index.return=TRUE,
decreasing = TRUE)
##
## To sort alphabetically by Gene Symbol
naStr <- unlist(lapply(all_genenames$SYMBOL, function(x){
if(is.na(x)){
"NA"
}else{
x
}
}))
all_genenames$SYMBOL <- naStr
sorted <- sort(all_genenames$SYMBOL, index.return=TRUE)
sam2 <- unlist(lapply(all_genenames$SYMBOL, function(x) {
if(x != "NA"){
sumTPM_across_TC_per_gene[[x]]
}else{
NA
}
}))
##
# Displays as datatable which is sortable, searchable
print_df <- data.frame("Symbol" = all_genenames$SYMBOL[sorted$ix],
"Weblink" =
paste0('<a href="https://www.ensembl.org/Danio_rerio/Gene/Summary?db=core;g=',
all_genenames$geneId[sorted$ix], '" target="_blank">',
all_genenames$GENENAME[sorted$ix],
'</a> '),
"Annot" = anno_terms[sorted$ix],
"domTPM" = format(round(all_genenames$tpm.dominant_ctss[sorted$ix], 3), nsmall=3),
"%%.domTPM" = format(round(100*(all_genenames$tpm.dominant_ctss[sorted$ix]/sam2[sorted$ix]), 2), nsmall=2),
"Match.score"= form_str(motif_scores[sorted$ix])
)
colnames(print_df)[5] <- "%domTPM"
return(print_df)
# Displays as general html text
paste0('--<span style="color:red">', all_genenames$SYMBOL[sorted$ix], '</span> ',
'<a href="https://www.ensembl.org/Danio_rerio/Gene/Summary?db=core;g=',
all_genenames$geneId[sorted$ix], '" target="_blank">',
all_genenames$GENENAME[sorted$ix],
'</a> ',
'<span style="color:green">', form_str(sorted$x), '</span> ',
collapse=" <br/> ")
})
genes <- lapply(genes1, function(x){
DT::datatable(x, escape = c(1), rownames = FALSE, width = 600, height = 1200,
options = list(paging= FALSE),
caption = paste0("TSS info [", nrow(x), " entries] sorted by the gene symbol"))
})
# MatchScores <- unlist(lapply(1:length(perSample_archR_clusts[[sn]]), function(x){
# ## To sort by motif scores, get motif scores first
# motif_scores <- suppressMessages(get_motif_scores(zf_2013_result, perSample_archR_clusts, sn, clust_id = x))
# ##
# sorted <- sort(motif_scores, index.return=TRUE,
# decreasing = TRUE)
# ##
# paste0('<a href="">', form_str(sorted$x),
# '</a>', collapse=", <br/> ")
# }))
suffix_label <- c("X", "Y", "Z")
names(suffix_label) <- sample_names
use_txt_size <- c(20, 20 ,20)
names(use_txt_size) <- sample_names
use_axis_text_size <- 35
use_axis_title_size <- 40
####
if(do_IQW_TPM_plots){
message("Task: Producing IQW_TPM_PhastCons scores combined plot")
for(sn in sample_names){
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
pl_iqw <- get_iqw_ord_plot(iqw = TRUE, y_axis_text = TRUE,
samarth_df = samarth_df[[sn]], text_size = use_axis_title_size,
seqs_clust = seqs_clusters_as_list_ordered[[sn]],
use_suffix = suffix_label[sn], use_notch = FALSE)
# pl_iqw <- pl_iqw + ggeasy::easy_all_text_size(size=18) +
# NULL +
# ggeasy::easy_y_axis_labels_size(size=18) +
# ggeasy::easy_x_axis_labels_size(size=18) +
# NULL
##
pl_tpm <- get_iqw_ord_plot(tpm = TRUE, y_axis_text = FALSE,
samarth_df = samarth_df[[sn]], text_size = use_axis_title_size,
seqs_clust = seqs_clusters_as_list_ordered[[sn]],
use_suffix = suffix_label[sn], use_notch = FALSE)
# pl_tpm <- pl_tpm + ggeasy::easy_all_text_size(size=18) +
# NULL +
# # ggeasy::easy_y_axis_labels_size(size=18) +
# ggeasy::easy_x_axis_labels_size(size=18) +
# NULL
##
# pl_phast <- get_iqw_ord_plot(phast = TRUE, y_axis_text = FALSE,
# samarth_df = samarth_df[[sn]],
# seqs_clust = seqs_clusters_as_list_ordered[[sn]])
# pl_phast <- pl_phast + ggeasy::easy_all_text_size(size=18) +
# NULL +
# # ggeasy::easy_y_axis_labels_size(size=18) +
# ggeasy::easy_x_axis_labels_size(size=18) +
# NULL
## IQW ordered plots
iqw_tpm_ord_pls <- pl_iqw | pl_tpm
# iqw_tpm_phast_ord_pls <- pl_iqw | pl_tpm | pl_phast
perSample_pl[[sn]] <- iqw_tpm_ord_pls #iqw_tpm_phast_ord_pls
# title_str <- paste0("Sample_", sn, "_IQW_TPM_phast_plot")
title_str <- paste0("Sample_", sn, "_IQW_TPM_plot")
pl_w_title <- perSample_pl[[sn]] + plot_annotation(title = title_str)
cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
paste0(title_str, "_paper.pdf")),
plot = perSample_pl[[sn]],
base_height = 10, base_width = 15
)
# cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
# paste0(title_str, ".png")),
# plot = pl_w_title,
# base_height = 17, base_width = 8
# )
# ggsave(filename = file.path(result_dir_path[[sn]],
# paste0(title_str, ".pdf")),
# plot = pl_w_title,
# width = 10, height = 7)
# ggsave(filename = file.path(result_dir_path[[sn]],
# paste0(title_str, ".png")),
# plot = pl_w_title, device = "png", dpi = 300,
# width = 10, height = 7)
# print(pl_w_title)
}
}else{
message("do_iqw_tpm_plot is FALSE")
}
Sequence logos of clusters ordered by their median interquantile width (ascending order). These are included as part of the combined panels. See 64 cells stage, dome 30% epiboly stage and prim6 stage.
#fig.width=11, fig.height=10, out.width="1200px", out.height="800px",
##
if(do_plot_arch){
use_ht <- list(15, 30, 20)
names(use_ht) <- sample_names
for(sn in sample_names){
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
##
message("Generating architectures for clusters of sequences...")
fname <- file.path(result_dir_path[[sn]],
paste0("Architectures_0-max_paper.pdf"))
iter5_zf_arch_list <- lapply(seq_along(perSample_archR_clusts[[sn]]),
function(y){
x <- perSample_archR_clusts[[sn]][[y]]
pl <- archR::plot_ggseqlogo_of_seqs(as.character(zf_2013_result[[sn]]$rawSeqs[x]),
pos_lab = -45:150, bits_yax = "auto", title = NULL, xt_freq = 15)
pl <- pl + theme_classic() +
theme(axis.text = element_text(size=use_axis_text_size),
axis.text.x = element_text(angle=0, vjust = 0.5, hjust = 0.5, size = use_axis_text_size),
axis.text.y = element_text(hjust = -1, size = use_axis_text_size),
# axis.text.y.left = element_text(hjust = -5),
# axis.text.y.left = element_text(vjust = 0.5, hjust = 1.5),
axis.title.y = element_text(size = use_axis_title_size),# axis.line.y.right = element_blank(),
axis.title.y.left = element_text(margin = unit(c(0, 0, 0, 0.1), "cm")),
axis.title.y.right = element_text(margin = unit(c(0, 0, 0, 0.5), "cm")),
axis.ticks.length.y.left = unit(0.15, "cm"),
axis.ticks.length.y.right = unit(0.00, "cm"),
axis.ticks.length.x.bottom = unit(0.2, "cm"),
axis.ticks = element_line(size=0.4),
# plot.margin = unit(c(0,0,0,0), "cm")
plot.margin = unit(c(0,0,0,0.4), "cm")
) +
# theme(axis.text = element_text(size=use_txt_size),
# # axis.text.x = element_text(angle=0, vjust = 2, hjust = 0.5),
# # axis.text.x = element_text(angle=90, vjust = 0, hjust = 0),
# axis.text.y = element_text(vjust = 0.5),
# axis.title.y = element_text(size=use_txt_size),
# axis.ticks.length = unit(0.04, "cm"),
# plot.margin = unit(c(0,0.1,-0.4,0.1), "cm")) +
ggplot2::scale_y_continuous(breaks = seq(0.0, 2.0, by = 0.5),
labels = scales::number_format(accuracy = 0.1),
sec.axis = dup_axis(name = paste0("C",y, suffix_label[sn]),
labels = NULL))
# pl + theme(plot.margin = unit(c(0,0.2,-0.5,0.1), "cm"))
# pl + theme(plot.margin = unit(c(0,0,-0.3,0), "cm"))
if(y == 1){
pl <- pl + theme(plot.margin = unit(c(0,0,0,0.4), "cm"))
}else{
pl <- pl + theme(plot.margin = unit(c(0,0,0,0.4), "cm"))
}
pl
})
## keep x-axis text for only few plots
iter5_zf_arch_list <- lapply(seq_along(iter5_zf_arch_list), function(x){
foo <- iter5_zf_arch_list[[x]] #+ ggplot2::theme(title = element_blank())
if(x != length(iter5_zf_arch_list)){
foo2 <- foo + ggplot2::theme(
axis.text.x.bottom = element_blank(),
axis.ticks.length.y.left = ggplot2::unit(0.15, "cm"),
axis.ticks.length.x.bottom = ggplot2::unit(0.2, "cm"),
plot.margin = ggplot2::unit(c(0.2, 0.3, -0.01, 0.1), "cm"))
return(foo2)
}else{
foo2 <- foo + ggplot2::theme(
plot.margin = ggplot2::unit(c(0.1, 0.3, -1, 0.1), "cm"))
return(foo2)
}
})
######### Zoomed plot for prim6 stage
message("Nb of plots: ", length(iter5_zf_arch_list))
if(sn == "prim6"){
message("====Creating zoomed in plot====")
y <- length(perSample_archR_clusts[[sn]])
x <- perSample_archR_clusts[[sn]][[y]]
##
sub_seqs <- Biostrings::subseq(zf_2013_result[[sn]]$rawSeqs[x],
start = 91,
end = Biostrings::width(zf_2013_result[[sn]]$rawSeqs[1]))
##
pl <- seqArchR::plot_ggseqlogo_of_seqs(sub_seqs,
pos_lab = 45:150, bits_yax = "auto", title = NULL, xt_freq = 5)
zoomed_pl <- pl + theme_classic() +
ggplot2::scale_x_continuous(breaks = seq(1, 106, by = 5),
labels = seq(45, 150, by = 5)) +
ggplot2::scale_y_continuous(breaks = seq(0.0, 0.5, by = 0.05),
labels = scales::number_format(accuracy = 0.05),
sec.axis = dup_axis(name = paste0("C",y, suffix_label[sn]),
labels = NULL)) +
theme(axis.text = element_text(size=use_axis_text_size),
axis.text.x = element_text(angle=0, vjust = 0.5, hjust = 0.5),
axis.text.y = element_text(hjust = -1),
# axis.text.y.left = element_text(hjust = -5),
# axis.text.y.left = element_text(vjust = 0.5, hjust = 1.5),
axis.title.y = element_text(size=use_axis_title_size),# axis.line.y.right = element_blank(),
axis.title.y.left = element_text(margin = unit(c(0, 0, 0, 0.1), "cm")),
axis.title.y.right = element_text(margin = unit(c(0, 0, 0, 0.5), "cm")),
axis.ticks.length.y.left = unit(0.15, "cm"),
axis.ticks.length.y.right = unit(0.00, "cm"),
axis.ticks.length.x.bottom = unit(0.2, "cm"),
axis.ticks = element_line(size=0.4),
# plot.margin = unit(c(0,0,0,0), "cm")
plot.margin = unit(c(0,0,0,0), "cm")
)
}
message("Nb of plots: ", length(iter5_zf_arch_list))
#########
# print(length(iter5_dm_arch_list))
r1c1 <- cowplot::plot_grid(plotlist = iter5_zf_arch_list, ncol = 1,
nrow = length(perSample_archR_clusts[[sn]]))
cowplot::save_plot(fname, plot = r1c1, limitsize = FALSE,
base_height = 2, base_width = 21,
ncol = 1,
nrow = length(perSample_archR_clusts[[sn]]),
dpi = 600)
# cowplot::ggsave2(fname, plot= r1c1, width=25, height=use_ht[[sn]], units="cm",
# dpi = 600, limitsize = FALSE)
# ## save PNGs
# for(p in 1:length(iter5_zf_arch_list)){
# this_result_dir_path <- file.path(result_dir_path[[sn]], "arch_png")
# stopifnot(check_and_create_dir(this_result_dir_path))
# fname <- file.path(this_result_dir_path,
# paste0("Architecture_clust", p, "_0-max.png"))
# cowplot::ggsave2(fname, plot= iter5_zf_arch_list[[p]],
# width=25, height=3, units="cm", dpi = 300)
# }
# r1c1
perSample_arch_combined[[sn]] <- r1c1
perSample_arch[[sn]] <- iter5_zf_arch_list
if(file.exists(fname)){
knitr::include_graphics(fname)
}
}
}else{
message("do_plot_arch is FALSE")
}
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/64_cells_results
## Generating architectures for clusters of sequences...
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Nb of plots: 6
##
## Nb of plots: 6
##
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/dome_30perc_epiboly_results
##
## Generating architectures for clusters of sequences...
##
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Nb of plots: 11
##
## Nb of plots: 11
##
## Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/prim6_results
##
## Generating architectures for clusters of sequences...
##
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Nb of plots: 10
##
## ====Creating zoomed in plot====
##
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Nb of plots: 10
##
overlapTypeInfo <- vector("list", length(sample_names))
names(overlapTypeInfo) <- sample_names
iter_sample_names <- c(1,2,3,1,2)
for(s in seq_along(sample_names)){
this_clusters <- perSample_archR_clusts[[sample_names[s]]]
overlapTypeInfo[[s]] <- data.frame(
clust = rep("0", length(unlist(this_clusters))),
Type = rep("N", length(unlist(this_clusters)))
)
perCluster_overlaps <- vector("list", length(this_clusters))
for(l in seq_along(this_clusters)){
sam <- this_clusters[[l]]
overlapTypeInfo[[s]][sam, "clust"] <- l
## overlapping with next sample/s/s+1
Atemp <- IRanges::overlapsAny(
query = GenomicRanges::promoters(
perSample_CAGEobj[[ sample_names[[iter_sample_names[s]]] ]][sam],
upstream = 45, downstream = 45),
subject = GenomicRanges::promoters(
perSample_CAGEobj[[ sample_names[[iter_sample_names[s+1] ]] ]],
upstream = 45, downstream = 45))
A <- which(Atemp == TRUE)
Anb <- length(A)
# print(A)
# print(Anb)
useIdx <- this_clusters[[l]][A]
overlapTypeInfo[[s]][useIdx, "Type"] <- "1-2"
## overlapping with next to next sample/s/s+2
Btemp <- IRanges::overlapsAny(
query = GenomicRanges::promoters(
perSample_CAGEobj[[ sample_names[[iter_sample_names[s]]] ]][sam],
upstream = 45, downstream = 45),
subject = GenomicRanges::promoters(
perSample_CAGEobj[[ sample_names[[iter_sample_names[s+2] ]] ]],
upstream = 45, downstream = 45))
##
B <- which(Btemp == TRUE)
Bnb <- length(B)
# print(B)
# print(Bnb)
useIdx <- this_clusters[[l]][B]
overlapTypeInfo[[s]][useIdx, "Type"] <- "1-3"
## unique to self
nonA <- which(Atemp == FALSE)
nonB <- which(Btemp == FALSE)
C <- intersect(nonA, nonB)
Cnb <- length(C)
# print(C)
# print(Cnb)
useIdx <- this_clusters[[l]][C]
overlapTypeInfo[[s]][useIdx, "Type"] <- "1"
## Common all
inAll <- intersect(A,B)
Dnb <- length(inAll)
# print(inAll)
# print(Dnb)
useIdx <- this_clusters[[l]][inAll]
overlapTypeInfo[[s]][useIdx, "Type"] <- "1-2-3"
##
# message(paste(length(sam), Anb, Bnb, Cnb, Dnb, sum(Anb,Bnb,Cnb,Dnb), collapse=", "))
# print(Bnb)
# print(Cnb)
perCluster_overlaps[[l]] <- c(Anb, Bnb, Cnb, Dnb)
}
perCluster_overlaps_perSample[[sample_names[s]]] <- do.call(rbind, perCluster_overlaps)
}
use_labs <- list(c("64C", "64C & D", "All", "64C & P6"),
c("D", "D & P6", "All", "64C & D"),
c("P6", "64C & P6", "All", "D & P6")
)
names(use_labs) <- sample_names
bar_pls <- vector("list", length(sample_names))
names(bar_pls) <- sample_names
for(sn in sample_names){
# result_dir_path <- file.path(archR_org_results_path, paste0(sn, "_results"))
# stopifnot(check_and_create_dir(result_dir_path))
clust_labs <- unlist(lapply(seq_along(perSample_archR_clusts[[sn]]),
function(x){
paste0("C", x, suffix_label[[sn]]
# " (", length(perSample_archR_clusts[[sn]][[x]]), ")"
)
}))
bar_pl <- ggplot(overlapTypeInfo[[sn]],
aes(y = fct_rev(reorder(clust, as.numeric(clust))), fill = Type)) +
geom_bar(position = position_fill(reverse=TRUE),
width = 0.6) +
ggplot2::theme_bw() +
ggplot2::scale_fill_discrete(name = "",
labels = use_labs[[sn]]) +
ggplot2::scale_x_continuous("Proportion", limits = c(0,1)) +
# ggplot2::ylab(NULL) +
ggplot2::scale_y_discrete("", position = "left",
labels = rev(clust_labs)) +
ggeasy::easy_legend_at("top") +
ggplot2::theme(legend.text = element_text(size = 10)) +
ggplot2::guides(fill = guide_legend(ncol = 2, byrow = TRUE)) +
ggeasy::easy_all_text_size(size=10)
bar_pls[[sn]] <- bar_pl
}
overlap_grid_pl <- cowplot::plot_grid(plotlist = bar_pls, nrow = 1, align = "h")
cowplot::save_plot(filename = file.path(result_dir_path[[sn]], "zf_figure_showing_overlaps_for_stages.pdf"),
plot = overlap_grid_pl, ncol = 3, nrow = 2,
base_height = 2, base_width = 3, limitsize=FALSE)
cl_char <- c("X", "Y", "Z")
pl_list <- vector("list", length(sample_names))
for(sn in seq_along(sample_names)){
seqArchRplus::per_cluster_strand_dist(sname = sample_names[[sn]],
clusts = perSample_archR_clusts[[sn]],
info_df = samarth_df[[sn]], dir_path = result_dir_path[[sn]])
info_df <- samarth_df[[sn]]
clusts <- perSample_archR_clusts[[sn]]
chr4_df_list <- lapply(seq_along(clusts), function(x){
df_strand <- data.frame(
"chr" = info_df$chr[clusts[[x]]],
"strand" = info_df$strand[clusts[[x]]]
)
df_strand_tab <- table(df_strand)
chr4_df <- data.frame(Chromosome = c("Chr4", "Other"),
Freq = c(0, 0))
total <- sum(df_strand_tab)
chr4_df$Freq[1] <- if("chr4" %in% rownames(df_strand_tab)) sum(df_strand_tab["chr4",]) else 0
chr4_df$Freq[2] <- total - chr4_df$Freq[1]
chr4_df$Proportion <- chr4_df$Freq / total
return(chr4_df)
})
chr4_df <- do.call(rbind, chr4_df_list)
chr4_df$Cluster <- rep(paste0("C", seq(chr4_df_list), cl_char[sn]), each=2)
pl_list[[sn]] <- ggpubr::ggbarplot(chr4_df, x = "Cluster", y = "Proportion",
ylim = c(0,1.0), palette = "Paired2", fill = "Chromosome",
order = paste0("C", rev(seq(chr4_df_list)), cl_char[sn]),
width = 0.7, orientation = "horizontal", legend = "top")
cowplot::ggsave2(filename = file.path(result_dir_path[[sn]],
"Chr4_OtherChr_proportions.pdf"),
plot = pl_list[[sn]], device = "pdf", width = 7,
height = 5)
}
##
## ── All clusters' strand distributions ──────────────────────────────────────────
##
## ── Sample: 64_cells ──
##
## ! Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/64_cells_results/64_cells_results
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
##
##
## ── All clusters' strand distributions ──────────────────────────────────────────
##
##
##
## ── Sample: dome_30perc_epiboly ──
##
##
##
## ! Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/dome_30perc_epiboly_results/dome_30perc_epiboly_results
##
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
##
##
## ── All clusters' strand distributions ──────────────────────────────────────────
##
##
##
## ── Sample: prim6 ──
##
##
##
## ! Directory exists: results/zebrafish-nepal2013/with_archR_v0.1.8/prim6_results/prim6_results
##
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
grid_pl <- cowplot::plot_grid(pl_list[[1]] + ggplot2::theme(legend.position = "none"),
pl_list[[2]] + ggplot2::theme(legend.position = "none"),
pl_list[[3]] + ggplot2::theme(legend.position = "none"),
ncol = 3)
legend <- cowplot::get_legend(
# create some space to the left of the legend
pl_list[[1]] + theme(legend.box.margin = margin(0, 0, 0, 12))
)
grid_pl <- cowplot::plot_grid(legend, grid_pl,
rel_heights = c(0.07,1),
ncol = 1)
cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
paste0("zebrafish_figure_paper_chr4_proportion_all_samples.pdf")),
plot = grid_pl, ncol = 3,
base_height = 5, base_width = 3,
limitsize=FALSE, dpi = 300, device = "pdf")
## fig.width=18, fig.height=12, warning=FALSE,results='hide'
## use cowplot to arrange plots
txdb_ensGene_fname <- file.path(archR_org_data_path, paste0("TxDb.Drerio.UCSC.danRer10.ensGene_self.2021-06-19.sqlite"))
if(!file.exists(txdb_ensGene_fname)){
txdb_ensGene_ucsc <- GenomicFeatures::makeTxDbFromUCSC(genome="danRer10",
tablename="ensGene")
AnnotationDbi::saveDb(txdb_ensGene_ucsc, file= txdb_ensGene_fname)
}else{
txdb_ensGene_ucsc <- AnnotationDbi::loadDb(txdb_ensGene_fname)
}
use_ht <- list(1.5, 1.5, 2)
use_wd <- list(14, 14, 18)
# use_axis_text_size <- 25
# use_axis_title_size <- 30
use_legend_text_size <- list(45, 45, 25)
names(use_legend_text_size) <- sample_names
names(use_ht) <- sample_names
names(use_wd) <- sample_names
panel_pls <- vector("list", length(sample_names))
names(panel_pls) <- sample_names
for(sn in sample_names){
stopifnot(check_and_create_dir(result_dir_path[[sn]]))
clust_labs <- unlist(lapply(seq_along(perSample_archR_clusts[[sn]]),
function(x){
paste0(x, "_(n=", length(perSample_archR_clusts[[sn]][[x]]), ")")
}))
clustwise_anno <- lapply(perSample_archR_clusts[[sn]], function(x){
foo_anno <- ChIPseeker::annotatePeak(perSample_CAGEobj[[sn]][x,],
tssRegion=c(-500, 100),
TxDb = txdb_ensGene_ucsc,
annoDb = "org.Dr.eg.db")
foo_anno
})
names(clustwise_anno) <- seq(1, length(perSample_archR_clusts[[sn]]))
## Solution using custom plotting
features_order <- c("Promoter", "5' UTR", "1st Exon", "Other Exon",
"1st Intron", "Other Intron", "Distal Intergenic", "3' UTR",
"Downstream (<=300)")
##
sam <- dplyr::bind_rows(lapply(clustwise_anno, function(x) x@annoStat), .id = "clust")
colrs <- RColorBrewer::brewer.pal(n = 9, name = "Paired")
levels(sam$Feature) <- features_order
names(colrs) <- features_order
clustwise_annobar <- ggplot(sam,
aes(y = fct_rev(reorder(clust, as.numeric(clust))), x = Frequency, fill = Feature)) +
geom_bar(stat = "identity", width = 0.6, position = position_fill(reverse=TRUE)) +
ggplot2::scale_fill_manual(name = "", values = colrs) +
ggplot2::theme_classic() +
ggplot2::ylab(NULL) +
ggplot2::xlab("Proportion") +
ggplot2::theme(
axis.text.y = element_blank(),
axis.ticks.length.y.left = unit(0.1, units = "cm"),
axis.ticks.length.x.bottom = unit(0.1, units = "cm"),
axis.text.x.bottom = element_text(size = use_axis_text_size),
axis.title.x.bottom = element_text(size = use_axis_title_size),
legend.position = "top",
legend.text = element_text(size = use_legend_text_size[[sn]])
) +
ggplot2::guides(fill = guide_legend(ncol = 2, byrow = TRUE,
legend.box.margin = margin(5, 5, 0, 0)
#legend.margin =margin(r=10,l=10,t=5,b=5),
#legend.key.size = unit(5, "cm")
))
## Solution using ChIPseeker package function
# names(clustwise_anno) <- paste0("C", seq(length(clustwise_anno)), suffix_label[sn])
# clustwise_annobar <- ChIPseeker::plotAnnoBar(clustwise_anno)
# clustwise_annobar <- clustwise_annobar + ggplot2::theme_classic() +
# ggplot2::theme(text=element_text(size=use_txt_size),
# title = NULL,
# legend.position = "bottom",
# axis.text.y.left = element_blank()) +
# ggplot2::ggtitle(NULL) +
# ggplot2::guides(fill = guide_legend(nrow = 3, byrow = TRUE
# ))
if(sn == "prim6"){
## Collect legend for later
legend <- cowplot::get_legend(
# create some space to the left of the legend
clustwise_annobar + theme(legend.box.margin = margin(0, 0, 0, 0))
)
bottom_row <- cowplot::plot_grid(NULL, zoomed_pl, NULL,
nrow = 1, rel_widths = c(0.4, 1, 0.25),
align = "h")
top_row <- cowplot::plot_grid(perSample_pl[[sn]],
perSample_arch_combined[[sn]],
clustwise_annobar + ggplot2::theme(legend.position = "none"),
ncol = 3,
rel_widths = c(0.5, 1, 0.3),
# rel_heights = c(1, 2.5, 1),
rel_heights = c(1, 0.3),
axis = "tb",
align = "h")
panel_pl <- cowplot::plot_grid(top_row,
bottom_row,
nrow = 2,
rel_heights = c(1, 0.1),
axis = "tb",
align = "h"
)
legend_row <- cowplot::plot_grid(NULL, NULL, NULL, legend, nrow = 1)
## With improved legend?
grid_pl <- cowplot::plot_grid(legend_row, panel_pl,
rel_heights = c(0.1, 1), ncol = 1)
cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
paste0("zebrafish_figure_paper_", sn,".pdf")),
plot = grid_pl, ncol = 3, nrow = 8,
base_height = 3, base_width = 14,
limitsize=FALSE, dpi = 300, device = "pdf")
}else{
## Collect legend for later
legend2 <- cowplot::get_legend(
# create some space to the left of the legend
clustwise_annobar + theme(legend.box.margin = margin(0, 0, 0, 0))
)
panel_pl <- cowplot::plot_grid(perSample_pl[[sn]],
perSample_arch_combined[[sn]],
clustwise_annobar + ggplot2::theme(legend.position = "none"),
ncol = 3,
#labels = c('A', 'B'), vjust=1, hjust = 0,
rel_widths = c(0.5, 1, 0.3),
# rel_heights = c(1, 2.5, 1),
rel_heights = c(1),
axis = "tb",
align = "h"
)
cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
paste0("zebrafish_figure_paper_", sn,".pdf")),
plot = panel_pl, ncol = 3, nrow = 10,
base_height = 3, base_width = 20,
limitsize=FALSE, dpi = 300, device = "pdf")
panel_pls[[sn]] <- panel_pl
}
}
## >> preparing features information... 2023-01-09 16:58:21
## >> identifying nearest features... 2023-01-09 16:58:22
## >> calculating distance from peak to TSS... 2023-01-09 16:58:22
## >> assigning genomic annotation... 2023-01-09 16:58:22
## >> adding gene annotation... 2023-01-09 16:58:30
## >> assigning chromosome lengths 2023-01-09 16:58:30
## >> done... 2023-01-09 16:58:30
## >> preparing features information... 2023-01-09 16:58:30
## >> identifying nearest features... 2023-01-09 16:58:30
## >> calculating distance from peak to TSS... 2023-01-09 16:58:30
## >> assigning genomic annotation... 2023-01-09 16:58:30
## >> adding gene annotation... 2023-01-09 16:58:32
## >> assigning chromosome lengths 2023-01-09 16:58:32
## >> done... 2023-01-09 16:58:32
## >> preparing features information... 2023-01-09 16:58:32
## >> identifying nearest features... 2023-01-09 16:58:32
## >> calculating distance from peak to TSS... 2023-01-09 16:58:32
## >> assigning genomic annotation... 2023-01-09 16:58:32
## >> adding gene annotation... 2023-01-09 16:58:33
## >> assigning chromosome lengths 2023-01-09 16:58:33
## >> done... 2023-01-09 16:58:33
## >> preparing features information... 2023-01-09 16:58:33
## >> identifying nearest features... 2023-01-09 16:58:33
## >> calculating distance from peak to TSS... 2023-01-09 16:58:33
## >> assigning genomic annotation... 2023-01-09 16:58:33
## >> adding gene annotation... 2023-01-09 16:58:34
## >> assigning chromosome lengths 2023-01-09 16:58:35
## >> done... 2023-01-09 16:58:35
## >> preparing features information... 2023-01-09 16:58:35
## >> identifying nearest features... 2023-01-09 16:58:35
## >> calculating distance from peak to TSS... 2023-01-09 16:58:35
## >> assigning genomic annotation... 2023-01-09 16:58:35
## >> adding gene annotation... 2023-01-09 16:58:36
## >> assigning chromosome lengths 2023-01-09 16:58:36
## >> done... 2023-01-09 16:58:36
## >> preparing features information... 2023-01-09 16:58:36
## >> identifying nearest features... 2023-01-09 16:58:36
## >> calculating distance from peak to TSS... 2023-01-09 16:58:36
## >> assigning genomic annotation... 2023-01-09 16:58:36
## >> adding gene annotation... 2023-01-09 16:58:37
## >> assigning chromosome lengths 2023-01-09 16:58:38
## >> done... 2023-01-09 16:58:38
## >> preparing features information... 2023-01-09 16:59:04
## >> identifying nearest features... 2023-01-09 16:59:04
## >> calculating distance from peak to TSS... 2023-01-09 16:59:04
## >> assigning genomic annotation... 2023-01-09 16:59:04
## >> adding gene annotation... 2023-01-09 16:59:05
## >> assigning chromosome lengths 2023-01-09 16:59:05
## >> done... 2023-01-09 16:59:06
## >> preparing features information... 2023-01-09 16:59:06
## >> identifying nearest features... 2023-01-09 16:59:06
## >> calculating distance from peak to TSS... 2023-01-09 16:59:06
## >> assigning genomic annotation... 2023-01-09 16:59:06
## >> adding gene annotation... 2023-01-09 16:59:07
## >> assigning chromosome lengths 2023-01-09 16:59:07
## >> done... 2023-01-09 16:59:07
## >> preparing features information... 2023-01-09 16:59:07
## >> identifying nearest features... 2023-01-09 16:59:07
## >> calculating distance from peak to TSS... 2023-01-09 16:59:07
## >> assigning genomic annotation... 2023-01-09 16:59:07
## >> adding gene annotation... 2023-01-09 16:59:08
## >> assigning chromosome lengths 2023-01-09 16:59:08
## >> done... 2023-01-09 16:59:08
## >> preparing features information... 2023-01-09 16:59:08
## >> identifying nearest features... 2023-01-09 16:59:08
## >> calculating distance from peak to TSS... 2023-01-09 16:59:08
## >> assigning genomic annotation... 2023-01-09 16:59:08
## >> adding gene annotation... 2023-01-09 16:59:10
## >> assigning chromosome lengths 2023-01-09 16:59:10
## >> done... 2023-01-09 16:59:10
## >> preparing features information... 2023-01-09 16:59:10
## >> identifying nearest features... 2023-01-09 16:59:10
## >> calculating distance from peak to TSS... 2023-01-09 16:59:10
## >> assigning genomic annotation... 2023-01-09 16:59:10
## >> adding gene annotation... 2023-01-09 16:59:11
## >> assigning chromosome lengths 2023-01-09 16:59:11
## >> done... 2023-01-09 16:59:11
## >> preparing features information... 2023-01-09 16:59:11
## >> identifying nearest features... 2023-01-09 16:59:11
## >> calculating distance from peak to TSS... 2023-01-09 16:59:12
## >> assigning genomic annotation... 2023-01-09 16:59:12
## >> adding gene annotation... 2023-01-09 16:59:13
## >> assigning chromosome lengths 2023-01-09 16:59:13
## >> done... 2023-01-09 16:59:13
## >> preparing features information... 2023-01-09 16:59:13
## >> identifying nearest features... 2023-01-09 16:59:13
## >> calculating distance from peak to TSS... 2023-01-09 16:59:13
## >> assigning genomic annotation... 2023-01-09 16:59:13
## >> adding gene annotation... 2023-01-09 16:59:14
## >> assigning chromosome lengths 2023-01-09 16:59:14
## >> done... 2023-01-09 16:59:14
## >> preparing features information... 2023-01-09 16:59:14
## >> identifying nearest features... 2023-01-09 16:59:14
## >> calculating distance from peak to TSS... 2023-01-09 16:59:15
## >> assigning genomic annotation... 2023-01-09 16:59:15
## >> adding gene annotation... 2023-01-09 16:59:16
## >> assigning chromosome lengths 2023-01-09 16:59:16
## >> done... 2023-01-09 16:59:16
## >> preparing features information... 2023-01-09 16:59:16
## >> identifying nearest features... 2023-01-09 16:59:16
## >> calculating distance from peak to TSS... 2023-01-09 16:59:16
## >> assigning genomic annotation... 2023-01-09 16:59:16
## >> adding gene annotation... 2023-01-09 16:59:17
## >> assigning chromosome lengths 2023-01-09 16:59:17
## >> done... 2023-01-09 16:59:17
## >> preparing features information... 2023-01-09 16:59:17
## >> identifying nearest features... 2023-01-09 16:59:17
## >> calculating distance from peak to TSS... 2023-01-09 16:59:18
## >> assigning genomic annotation... 2023-01-09 16:59:18
## >> adding gene annotation... 2023-01-09 16:59:19
## >> assigning chromosome lengths 2023-01-09 16:59:19
## >> done... 2023-01-09 16:59:19
## >> preparing features information... 2023-01-09 16:59:19
## >> identifying nearest features... 2023-01-09 16:59:19
## >> calculating distance from peak to TSS... 2023-01-09 16:59:19
## >> assigning genomic annotation... 2023-01-09 16:59:19
## >> adding gene annotation... 2023-01-09 16:59:20
## >> assigning chromosome lengths 2023-01-09 16:59:20
## >> done... 2023-01-09 16:59:20
## >> preparing features information... 2023-01-09 16:59:22
## >> identifying nearest features... 2023-01-09 16:59:22
## >> calculating distance from peak to TSS... 2023-01-09 16:59:23
## >> assigning genomic annotation... 2023-01-09 16:59:23
## >> adding gene annotation... 2023-01-09 16:59:24
## >> assigning chromosome lengths 2023-01-09 16:59:24
## >> done... 2023-01-09 16:59:24
## >> preparing features information... 2023-01-09 16:59:24
## >> identifying nearest features... 2023-01-09 16:59:24
## >> calculating distance from peak to TSS... 2023-01-09 16:59:24
## >> assigning genomic annotation... 2023-01-09 16:59:24
## >> adding gene annotation... 2023-01-09 16:59:25
## >> assigning chromosome lengths 2023-01-09 16:59:25
## >> done... 2023-01-09 16:59:25
## >> preparing features information... 2023-01-09 16:59:25
## >> identifying nearest features... 2023-01-09 16:59:25
## >> calculating distance from peak to TSS... 2023-01-09 16:59:26
## >> assigning genomic annotation... 2023-01-09 16:59:26
## >> adding gene annotation... 2023-01-09 16:59:27
## >> assigning chromosome lengths 2023-01-09 16:59:27
## >> done... 2023-01-09 16:59:27
## >> preparing features information... 2023-01-09 16:59:27
## >> identifying nearest features... 2023-01-09 16:59:27
## >> calculating distance from peak to TSS... 2023-01-09 16:59:27
## >> assigning genomic annotation... 2023-01-09 16:59:27
## >> adding gene annotation... 2023-01-09 16:59:28
## >> assigning chromosome lengths 2023-01-09 16:59:28
## >> done... 2023-01-09 16:59:28
## >> preparing features information... 2023-01-09 16:59:28
## >> identifying nearest features... 2023-01-09 16:59:28
## >> calculating distance from peak to TSS... 2023-01-09 16:59:29
## >> assigning genomic annotation... 2023-01-09 16:59:29
## >> adding gene annotation... 2023-01-09 16:59:30
## >> assigning chromosome lengths 2023-01-09 16:59:30
## >> done... 2023-01-09 16:59:30
## >> preparing features information... 2023-01-09 16:59:30
## >> identifying nearest features... 2023-01-09 16:59:30
## >> calculating distance from peak to TSS... 2023-01-09 16:59:30
## >> assigning genomic annotation... 2023-01-09 16:59:30
## >> adding gene annotation... 2023-01-09 16:59:32
## >> assigning chromosome lengths 2023-01-09 16:59:33
## >> done... 2023-01-09 16:59:33
## >> preparing features information... 2023-01-09 16:59:33
## >> identifying nearest features... 2023-01-09 16:59:33
## >> calculating distance from peak to TSS... 2023-01-09 16:59:33
## >> assigning genomic annotation... 2023-01-09 16:59:33
## >> adding gene annotation... 2023-01-09 16:59:34
## >> assigning chromosome lengths 2023-01-09 16:59:34
## >> done... 2023-01-09 16:59:34
## >> preparing features information... 2023-01-09 16:59:34
## >> identifying nearest features... 2023-01-09 16:59:34
## >> calculating distance from peak to TSS... 2023-01-09 16:59:34
## >> assigning genomic annotation... 2023-01-09 16:59:34
## >> adding gene annotation... 2023-01-09 16:59:35
## >> assigning chromosome lengths 2023-01-09 16:59:35
## >> done... 2023-01-09 16:59:35
## >> preparing features information... 2023-01-09 16:59:35
## >> identifying nearest features... 2023-01-09 16:59:36
## >> calculating distance from peak to TSS... 2023-01-09 16:59:36
## >> assigning genomic annotation... 2023-01-09 16:59:36
## >> adding gene annotation... 2023-01-09 16:59:37
## >> assigning chromosome lengths 2023-01-09 16:59:37
## >> done... 2023-01-09 16:59:37
## >> preparing features information... 2023-01-09 16:59:37
## >> identifying nearest features... 2023-01-09 16:59:37
## >> calculating distance from peak to TSS... 2023-01-09 16:59:37
## >> assigning genomic annotation... 2023-01-09 16:59:37
## >> adding gene annotation... 2023-01-09 16:59:38
## >> assigning chromosome lengths 2023-01-09 16:59:38
## >> done... 2023-01-09 16:59:38
## Create a grid for panels from first two stages
stage_combined_panel_pl <- cowplot::plot_grid(panel_pls[[1]], panel_pls[[2]],
nrow = 2,
labels = c('A', 'B'), #vjust=0, hjust = 0,
label_size = use_axis_title_size+10,
rel_heights = c(0.6, 1),
axis = "tb",
align = "h"
)
legend_row <- cowplot::plot_grid(NULL, NULL, NULL, legend2, nrow = 1)
## With improved legend?
grid_pl <- cowplot::plot_grid(legend_row, stage_combined_panel_pl,
rel_heights = c(0.1, 1), ncol = 1)
cowplot::save_plot(filename = file.path(result_dir_path[[sn]], paste0("zebrafish_figure_paper_combined_stage_1and2.pdf")),
plot = grid_pl, ncol = 3, nrow = 15,
base_height = 3, base_width = 18,
limitsize=FALSE, dpi = 300, device = "pdf")
# c1-3,5x
# c4,8y
# c6,7,8, 9z
## no ww signal/WW-box arch
arch_term <- "clusters_with_and_without_downstream_W_signal"
clust_list <- list(## Non-WW enrichment downstream
c(1,2), c(1,3), c(1,5), ## Also has W-box
c(2,4), c(2,8), ## Also has W-box
c(3,7), c(3,8), c(3,9), ##
## WW enrichment downstream
c(1,1), c(1,6), ## Also has W-box
c(2,5), c(2,6), c(2,7), ## Also has W-box
c(3,10) ## only slight
)
go_res <- lapply(clust_list, function(x){
clusterProfiler::enrichGO(perSample_entrezList[[x[1]]][[x[2]]],
OrgDb = org.Dr.eg.db, ont = "all", pool = TRUE,
keyType = "ENTREZID",
pvalueCutoff = 0.05, qvalueCutoff = 0.1)
})
use_names <- paste0("C", unlist(lapply(clust_list, function(x) x[2])),
c(rep("X", 3), rep("Y", 2), rep("Z",3), rep("X", 2), rep("Y", 3), rep("Z",1)))
stopifnot(length(use_names) == length(go_res))
names(go_res) <- use_names
compare_result <- clusterProfiler::merge_result(go_res)
go_term_fs <- 12
show_cats <- c(5, 10)
go_pl <- vector("list", length(show_cats))
for(i in seq_along(show_cats)){
p1 <- dotplot(compare_result, showCategory = show_cats[i],
title = paste0("Comparing ", arch_term, " architectures"),
font.size = go_term_fs)
pl <- p1 + DOSE::theme_dose(font.size = go_term_fs) +
ggplot2::scale_y_discrete(labels = scales::label_wrap(50)) +
ggplot2::scale_color_gradient() +
ggplot2::expand_limits(y = -1) +
ggplot2::annotate("rect", fill = c("red", "blue"), alpha = 0.5,
xmin = c(-Inf, 7.5), xmax = c(7.5,Inf),
ymin = -1, ymax = 0) +
ggplot2::theme(title = element_text(size = go_term_fs),
legend.text = element_text(size = go_term_fs),
legend.title = element_text(size = go_term_fs))
go_pl[[i]] <- pl
cowplot::save_plot(
filename = file.path(result_dir_path[[3]], paste0(arch_term, "_go_plot_top", show_cats[i], "_terms.pdf")),
plot = go_pl[[i]],
base_height = 5, base_width = 8)
}
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
wwbox_go_pl <- go_pl
# go_grid_pl <- cowplot::plot_grid(wwbox_go_pl[[1]], NULL, rel_widths = c(1, 0.4))
comb_pl <- cowplot::plot_grid(grid_pl, wwbox_go_pl[[1]], nrow = 2,
rel_heights = c(0.3, 1),
labels = "AUTO", label_size = 14)
cowplot::save_plot(filename = file.path(result_dir_path[[3]],
paste0("combined_top5_GO_mix_clusts_chr4_prop.pdf")),
plot = comb_pl, base_height = 15, base_width = 14)
comb_pl <- cowplot::plot_grid(wwbox_go_pl[[2]], nrow = 1, labels = "AUTO", label_size = 14)
cowplot::save_plot(filename = file.path(result_dir_path[[3]],
paste0("combined_top10_GO_mix_clusts.pdf")),
plot = comb_pl, base_height = 15, base_width = 14)
## WW signal architecture periodic vs no period across stages:
## collect c9z, c10x and c12y clusters and compare their enriched go terms
arch_term <- "WW_signal"
clust_list <- list(c(1,1), c(1,6),
c(2,6), c(2,7), c(3,10))
go_res <- lapply(clust_list, function(x){
clusterProfiler::enrichGO(perSample_entrezList[[x[1]]][[x[2]]],
OrgDb = org.Dr.eg.db, ont = "all", pool = TRUE,
keyType = "ENTREZID",
pvalueCutoff = 0.05, qvalueCutoff = 0.1)
})
names(go_res) <- paste0("C",
unlist(lapply(clust_list, function(x) x[2])),
c("X", "Y", "Z"))
compare_result <- clusterProfiler::merge_result(go_res)
go_term_fs <- 12
show_cats <- c(5, 10)
go_pl <- vector("list", length(show_cats))
for(i in seq_along(show_cats)){
p1 <- dotplot(compare_result, showCategory = show_cats[i],
title = paste0("Comparing ", arch_term, " architectures"),
font.size = go_term_fs)
pl <- p1 + DOSE::theme_dose(font.size = go_term_fs) +
ggplot2::scale_y_discrete(labels = scales::label_wrap(50)) +
ggplot2::scale_color_gradient() +
ggplot2::theme(title = element_text(size = go_term_fs),
legend.text = element_text(size = go_term_fs),
legend.title = element_text(size = go_term_fs))
go_pl[[i]] <- pl
cowplot::save_plot(
filename = file.path(result_dir_path[[3]], paste0(arch_term, "_go_plot_top", show_cats[i], "_terms.pdf")),
plot = go_pl[[i]],
base_height = 5, base_width = 8)
}
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
ww_go_pl <- go_pl
go_grid_pl <- cowplot::plot_grid(ww_go_pl[[1]], NULL, rel_widths = c(1, 0.4))
comb_pl <- cowplot::plot_grid(grid_pl, go_grid_pl, nrow = 2,
rel_heights = c(0.75, 1),
labels = "AUTO", label_size = 14)
cowplot::save_plot(filename = file.path(result_dir_path[[3]],
paste0("combined_top5_GO_ww_clusts_chr4_prop.pdf")),
plot = comb_pl, base_height = 7, base_width = 10)
comb_pl <- cowplot::plot_grid(ww_go_pl[[2]], nrow = 1, labels = "AUTO", label_size = 14)
cowplot::save_plot(filename = file.path(result_dir_path[[3]],
paste0("combined_top10_GO_ww_clusts.pdf")),
plot = comb_pl, base_height = 8, base_width = 7)
shifting_prom <- readxl::read_excel("/mnt/biggley/home/sarvesh/projects/promoter-architectures-all/clustering-promoter-architectures/promArch_only_for_biorxiv_manuscript/promArch/manuscript/biorxiv/experiments/data/zebrafish-shifting-promoters/41586_2014_BFnature12974_MOESM327_ESM.xls", skip = 1)
cl_char <- c("X", "Y", "Z")
perSample_perClust_shiftingProm <- vector("list", length(sample_names))
for(sn in seq_along(sample_names)){
perSample_perClust_shiftingProm[[sn]] <-
lapply(seq_along(perSample_archR_clusts[[sn]]), function(x){
sam_df <- as.data.frame(perSample_peakAnno[[sn]])
this_entrez <- sam_df$ENTREZID[perSample_archR_clusts[[sn]][[x]]]
shift_entrez <- shifting_prom$entrezgene
## intersections in this cluster
which_present <- intersect(shift_entrez, this_entrez)
message("C", x, cl_char[sn], " = ", length(which_present))
which_present
})
}
## C1X = 94
## C2X = 86
## C3X = 46
## C4X = 36
## C5X = 272
## C6X = 242
## C1Y = 2
## C2Y = 1
## C3Y = 0
## C4Y = 226
## C5Y = 11
## C6Y = 241
## C7Y = 135
## C8Y = 87
## C9Y = 0
## C10Y = 1
## C11Y = 17
## C1Z = 1
## C2Z = 0
## C3Z = 3
## C4Z = 0
## C5Z = 2
## C6Z = 84
## C7Z = 71
## C8Z = 143
## C9Z = 60
## C10Z = 330
pl_list <- vector("list", length(sample_names))
for(sn in seq_along(sample_names)){
sam_df <- data.frame(Clusters =
paste0("C", seq(length(perSample_perClust_shiftingProm[[sn]])), cl_char[sn]),
ShiftingPromoters = lengths(perSample_perClust_shiftingProm[[sn]]))
pl_list[[sn]] <- ggpubr::ggbarplot(sam_df, y = "ShiftingPromoters", x = "Clusters",
label = TRUE, label.pos = "out", x.text.angle = 45, fill = "grey",
ylab = paste0("#Shifting Promoters (", sum(sam_df$ShiftingPromoters), "/911)"))
}
grid_pl2 <- cowplot::plot_grid(plotlist = pl_list, ncol = 3)#, rel_widths = c(0.6, 1, 1))
cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
paste0("zebrafish_figure_paper_shiftingProm_proportion_all_samples.pdf")),
plot = grid_pl2, ncol = 4,
base_height = 4, base_width = 3,
limitsize=FALSE, dpi = 300, device = "pdf")
grid_pl3 <- cowplot::plot_grid(grid_pl, grid_pl2, nrow = 2)
cowplot::save_plot(filename = file.path(result_dir_path[[sn]],
paste0("zebrafish_figure_paper_chr4_prop_shiftingProm_prop_all_samples.pdf")),
plot = grid_pl3, ncol = 4,
base_height = 8, base_width = 3,
limitsize=FALSE, dpi = 300, device = "pdf")
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux bookworm/sid
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_DK.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_DK.UTF-8 LC_COLLATE=en_DK.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_DK.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] DT_0.26
## [2] clusterProfiler_4.6.0
## [3] ChIPseeker_1.34.1
## [4] wesanderson_0.3.6
## [5] cowplot_1.1.1
## [6] patchwork_1.1.2
## [7] forcats_0.5.2
## [8] ggplot2_3.4.0
## [9] reshape2_1.4.4
## [10] readr_2.1.3
## [11] CAGEr_2.2.0
## [12] MultiAssayExperiment_1.23.11
## [13] SummarizedExperiment_1.28.0
## [14] MatrixGenerics_1.10.0
## [15] matrixStats_0.63.0
## [16] ggeasy_0.1.3
## [17] org.Dr.eg.db_3.16.0
## [18] TxDb.Drerio.UCSC.danRer10.refGene_3.4.6
## [19] GenomicFeatures_1.50.2
## [20] AnnotationDbi_1.60.0
## [21] Biobase_2.58.0
## [22] BSgenome.Drerio.UCSC.danRer10_1.4.2
## [23] BSgenome_1.66.1
## [24] rtracklayer_1.58.0
## [25] Biostrings_2.66.0
## [26] XVector_0.38.0
## [27] GenomicRanges_1.50.1
## [28] GenomeInfoDb_1.34.4
## [29] IRanges_2.32.0
## [30] S4Vectors_0.36.1
## [31] BiocGenerics_0.44.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2
## [2] tidyselect_1.2.0
## [3] htmlwidgets_1.5.4
## [4] RSQLite_2.2.19
## [5] grid_4.2.1
## [6] BiocParallel_1.31.14
## [7] scatterpie_0.1.8
## [8] munsell_0.5.0
## [9] ragg_1.2.3
## [10] codetools_0.2-18
## [11] withr_2.5.0
## [12] colorspace_2.0-3
## [13] GOSemSim_2.24.0
## [14] filelock_1.0.2
## [15] highr_0.9
## [16] knitr_1.41
## [17] rstudioapi_0.14
## [18] ggsignif_0.6.4
## [19] DOSE_3.24.2
## [20] labeling_0.4.2
## [21] seqArchR_1.2.0
## [22] GenomeInfoDbData_1.2.9
## [23] polyclip_1.10-4
## [24] seqPattern_1.30.0
## [25] bit64_4.0.5
## [26] farver_2.1.1
## [27] downloader_0.4
## [28] vctrs_0.5.1
## [29] treeio_1.22.0
## [30] generics_0.1.3
## [31] gson_0.0.9
## [32] xfun_0.35
## [33] BiocFileCache_2.6.0
## [34] ggseqlogo_0.1
## [35] R6_2.5.1
## [36] graphlayouts_0.8.3
## [37] archR_0.1.8
## [38] VGAM_1.1-6
## [39] bitops_1.0-7
## [40] cachem_1.0.6
## [41] fgsea_1.24.0
## [42] gridGraphics_0.5-1
## [43] DelayedArray_0.24.0
## [44] assertthat_0.2.1
## [45] BiocIO_1.8.0
## [46] scales_1.2.1
## [47] ggraph_2.1.0
## [48] enrichplot_1.18.3
## [49] gtable_0.3.1
## [50] formula.tools_1.7.1
## [51] tidygraph_1.2.2
## [52] seqLogo_1.63.0
## [53] rlang_1.0.6
## [54] slickR_0.5.0
## [55] systemfonts_1.0.4
## [56] splines_4.2.1
## [57] rstatix_0.7.1
## [58] lazyeval_0.2.2
## [59] broom_1.0.1
## [60] abind_1.4-5
## [61] yaml_2.3.6
## [62] crosstalk_1.2.0
## [63] backports_1.4.1
## [64] qvalue_2.30.0
## [65] tools_4.2.1
## [66] bookdown_0.29
## [67] ggplotify_0.1.0
## [68] ellipsis_0.3.2
## [69] gplots_3.1.3
## [70] jquerylib_0.1.4
## [71] RColorBrewer_1.1-3
## [72] Rcpp_1.0.9
## [73] plyr_1.8.7
## [74] base64enc_0.1-3
## [75] sparseMatrixStats_1.8.0
## [76] progress_1.2.2
## [77] zlibbioc_1.44.0
## [78] purrr_0.3.5
## [79] RCurl_1.98-1.9
## [80] prettyunits_1.1.1
## [81] ggpubr_0.5.0
## [82] viridis_0.6.2
## [83] ggrepel_0.9.2
## [84] cluster_2.1.3
## [85] factoextra_1.0.7
## [86] magrittr_2.0.3
## [87] data.table_1.14.2
## [88] mime_0.12
## [89] hms_1.1.2
## [90] evaluate_0.18
## [91] HDO.db_0.99.1
## [92] XML_3.99-0.13
## [93] readxl_1.4.1
## [94] gridExtra_2.3
## [95] compiler_4.2.1
## [96] biomaRt_2.54.0
## [97] tibble_3.1.8
## [98] KernSmooth_2.23-20
## [99] crayon_1.5.2
## [100] shadowtext_0.1.2
## [101] htmltools_0.5.4
## [102] ggfun_0.0.9
## [103] mgcv_1.8-38
## [104] tzdb_0.3.0
## [105] tidyr_1.2.1
## [106] aplot_0.1.9
## [107] DBI_1.1.3
## [108] tweenr_2.0.2
## [109] dbplyr_2.2.1
## [110] MASS_7.3-57
## [111] rappdirs_0.3.3
## [112] boot_1.3-28
## [113] som_0.3-5.1
## [114] car_3.1-1
## [115] Matrix_1.5-1
## [116] permute_0.9-7
## [117] cli_3.4.1
## [118] gdata_2.18.0.1
## [119] evd_2.3-6.1
## [120] parallel_4.2.1
## [121] igraph_1.3.5
## [122] pkgconfig_2.0.3
## [123] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [124] GenomicAlignments_1.34.0
## [125] hopach_2.57.0
## [126] xml2_1.3.3
## [127] ggtree_3.6.2
## [128] bslib_0.4.0
## [129] stringdist_0.9.8
## [130] yulab.utils_0.0.5
## [131] stringr_1.5.0
## [132] digest_0.6.31
## [133] vegan_2.6-2
## [134] cellranger_1.1.0
## [135] rmarkdown_2.17
## [136] fastmatch_1.1-3
## [137] tidytree_0.4.1
## [138] operator.tools_1.6.3
## [139] dendextend_1.16.0
## [140] seqArchRplus_0.99.8
## [141] DelayedMatrixStats_1.18.0
## [142] restfulr_0.0.15
## [143] curl_4.3.3
## [144] Rsamtools_2.14.0
## [145] gtools_3.9.2.2
## [146] rjson_0.2.21
## [147] lifecycle_1.0.3
## [148] nlme_3.1-158
## [149] jsonlite_1.8.4
## [150] carData_3.0-5
## [151] viridisLite_0.4.1
## [152] fansi_1.0.3
## [153] pillar_1.8.1
## [154] lattice_0.20-45
## [155] KEGGREST_1.38.0
## [156] fastmap_1.1.0
## [157] httr_1.4.4
## [158] plotrix_3.8-2
## [159] GO.db_3.16.0
## [160] glue_1.6.2
## [161] PWMEnrich_4.33.0
## [162] png_0.1-8
## [163] bit_4.0.5
## [164] ggforce_0.4.1
## [165] stringi_1.7.8
## [166] sass_0.4.2
## [167] blob_1.2.3
## [168] textshaping_0.3.6
## [169] caTools_1.18.2
## [170] memoise_2.0.1
## [171] dplyr_1.0.10
## [172] ape_5.6-2